Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,9 @@ from st_keyup import st_keyup
|
|
| 13 |
|
| 14 |
# What does persist = disk do ?
|
| 15 |
# @st.cache_data(persist="disk")
|
|
|
|
|
|
|
|
|
|
| 16 |
@st.cache_data
|
| 17 |
def load_pandas_xlsx(path):
|
| 18 |
data = pd.read_excel(path)
|
|
@@ -24,6 +27,8 @@ def build_company_df(input_df):
|
|
| 24 |
output_df = input_df[['companyLabel', 'companyLabelJA', 'company']].drop_duplicates()
|
| 25 |
return output_df
|
| 26 |
|
|
|
|
|
|
|
| 27 |
@st.cache_data
|
| 28 |
def build_industry_df(input_df):
|
| 29 |
# Pre compute unique number of companies per industry
|
|
@@ -31,6 +36,8 @@ def build_industry_df(input_df):
|
|
| 31 |
output_df = output_df.rename(columns={'company': 'n_competitors'})
|
| 32 |
return output_df
|
| 33 |
|
|
|
|
|
|
|
| 34 |
@st.cache_data
|
| 35 |
def build_product_df(input_df):
|
| 36 |
# Pre compute unique number of companies per product
|
|
@@ -38,6 +45,58 @@ def build_product_df(input_df):
|
|
| 38 |
output_df = output_df.rename(columns={'company': 'n_competitors'})
|
| 39 |
return output_df
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def search_df(inp, df, col):
|
| 42 |
mask = df[col].str.contains(inp, case=False, regex=False)
|
| 43 |
select_df = df[mask]
|
|
@@ -45,9 +104,14 @@ def search_df(inp, df, col):
|
|
| 45 |
|
| 46 |
##### Data Logic #####
|
| 47 |
|
|
|
|
| 48 |
COMPETITOR_PATH = 'data/merged_competitors_all_20241122.xlsx'
|
| 49 |
INDUSTRY_PATH = 'data/industry_hierarchy_20241125.xlsx'
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# Load data
|
| 52 |
with st.spinner(text="Loading competitor data ..."):
|
| 53 |
competitor_df = load_pandas_xlsx(COMPETITOR_PATH)
|
|
@@ -65,41 +129,26 @@ industry_hierarchy = load_pandas_xlsx(INDUSTRY_PATH)
|
|
| 65 |
### Pre computation Steps ###
|
| 66 |
|
| 67 |
# Pre compute unique number of companies per industry
|
| 68 |
-
# industry_to_counts = competitor_df[['company', 'companyLabel', 'companyLabelJA', 'industry', 'industryLabel', 'industryLabelJA']].drop_duplicates().groupby(['industry', 'industryLabel', 'industryLabelJA'])['company'].count().sort_values(ascending=False).reset_index().copy()
|
| 69 |
-
# industry_to_counts = industry_to_counts.rename(columns={'company': 'n_competitors'})
|
| 70 |
-
|
| 71 |
industry_to_counts = build_industry_df(competitor_df)
|
| 72 |
|
| 73 |
# Pre compute unique number of companies per industry
|
| 74 |
-
# product_to_counts = competitor_df[['company', 'companyLabel', 'companyLabelJA', 'product', 'productLabel', 'productLabelJA']].drop_duplicates().groupby(['product', 'productLabel', 'productLabelJA'])['company'].count().sort_values(ascending=False).reset_index().copy()
|
| 75 |
-
# product_to_counts = product_to_counts.rename(columns={'company': 'n_competitors'})
|
| 76 |
-
|
| 77 |
product_to_counts = build_product_df(competitor_df)
|
| 78 |
|
| 79 |
-
### end ###
|
| 80 |
-
|
| 81 |
-
# Parse Data
|
| 82 |
-
###with st.spinner(text="Computing unique companies ..."):
|
| 83 |
-
### unique_companies = list(competitor_data.companyLabel.unique())
|
| 84 |
-
### st.success("Unique Companies Done!")
|
| 85 |
-
|
| 86 |
-
import streamlit as st
|
| 87 |
|
| 88 |
# Title
|
| 89 |
-
st.title('3C Competitor Analysis Demo')
|
| 90 |
|
| 91 |
option = st.selectbox(
|
| 92 |
-
"
|
| 93 |
-
("
|
| 94 |
)
|
| 95 |
|
| 96 |
st.write("You selected:", option)
|
| 97 |
|
| 98 |
-
|
| 99 |
##### App Logic #####
|
| 100 |
|
| 101 |
-
|
| 102 |
-
if option == "By Company":
|
| 103 |
|
| 104 |
st.title("Searching by Company")
|
| 105 |
|
|
@@ -201,7 +250,7 @@ if option == "By Company":
|
|
| 201 |
st.dataframe(competitors_by_country)
|
| 202 |
|
| 203 |
|
| 204 |
-
elif option == "
|
| 205 |
|
| 206 |
st.title("Searching by Industry")
|
| 207 |
|
|
@@ -278,7 +327,7 @@ elif option == "By Industry":
|
|
| 278 |
st.graphviz_chart(graph)
|
| 279 |
|
| 280 |
|
| 281 |
-
elif option == "
|
| 282 |
|
| 283 |
st.title("Searching by Product")
|
| 284 |
|
|
@@ -308,6 +357,178 @@ elif option == "By Product":
|
|
| 308 |
competitors = competitor_df[competitor_df['product'] == product['product']][['companyLabel', 'companyLabelJA', 'company', 'country', 'countryLabel']].drop_duplicates().copy()
|
| 309 |
st.dataframe(competitors)
|
| 310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
| 312 |
else:
|
| 313 |
|
|
|
|
| 13 |
|
| 14 |
# What does persist = disk do ?
|
| 15 |
# @st.cache_data(persist="disk")
|
| 16 |
+
|
| 17 |
+
### For competitor analysis
|
| 18 |
+
|
| 19 |
@st.cache_data
|
| 20 |
def load_pandas_xlsx(path):
|
| 21 |
data = pd.read_excel(path)
|
|
|
|
| 27 |
output_df = input_df[['companyLabel', 'companyLabelJA', 'company']].drop_duplicates()
|
| 28 |
return output_df
|
| 29 |
|
| 30 |
+
### For industry analysis
|
| 31 |
+
|
| 32 |
@st.cache_data
|
| 33 |
def build_industry_df(input_df):
|
| 34 |
# Pre compute unique number of companies per industry
|
|
|
|
| 36 |
output_df = output_df.rename(columns={'company': 'n_competitors'})
|
| 37 |
return output_df
|
| 38 |
|
| 39 |
+
### For product analysis
|
| 40 |
+
|
| 41 |
@st.cache_data
|
| 42 |
def build_product_df(input_df):
|
| 43 |
# Pre compute unique number of companies per product
|
|
|
|
| 45 |
output_df = output_df.rename(columns={'company': 'n_competitors'})
|
| 46 |
return output_df
|
| 47 |
|
| 48 |
+
### For customer analysis
|
| 49 |
+
|
| 50 |
+
@st.cache_data
|
| 51 |
+
def build_company_product_kg(company_product_path, product_manufacturer_path):
|
| 52 |
+
company_product_df = pd.read_csv(company_product_path)
|
| 53 |
+
product_manufacturer_df = pd.read_csv(product_manufacturer_path)
|
| 54 |
+
|
| 55 |
+
output_df = pd.concat([company_product_df, product_manufacturer_df])
|
| 56 |
+
return output_df
|
| 57 |
+
|
| 58 |
+
@st.cache_data
|
| 59 |
+
def build_company_df_2(input_df):
|
| 60 |
+
# build company df
|
| 61 |
+
output_df = input_df[['companyLabel', 'companyLabelJA', 'company']].drop_duplicates()
|
| 62 |
+
return output_df
|
| 63 |
+
|
| 64 |
+
property_mapping = {
|
| 65 |
+
'http://www.wikidata.org/prop/direct/P186': 'made_from_material',
|
| 66 |
+
'http://www.wikidata.org/prop/direct/P527': 'has_part',
|
| 67 |
+
'http://www.wikidata.org/prop/direct/P2283': 'uses',
|
| 68 |
+
'http://www.wikidata.org/prop/direct/P31': 'instance_of',
|
| 69 |
+
'http://www.wikidata.org/prop/direct/P366': 'has_use',
|
| 70 |
+
'http://www.wikidata.org/prop/direct/P361': 'part_of'
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
@st.cache_data
|
| 74 |
+
def build_product_kg_df():
|
| 75 |
+
|
| 76 |
+
product_df_1 = pd.read_csv('data/product_manufacturer_relations_haspart_uses_madefrom_out.csv')
|
| 77 |
+
product_df_2 = pd.read_csv('data/product_relations_haspart_uses_madefrom_out.csv')
|
| 78 |
+
|
| 79 |
+
product_df_3 = pd.read_csv('data/product_relations_partof_hasuse_out.csv')
|
| 80 |
+
product_df_4 = pd.read_csv('data/product_manufacturer_relations_hasuse_partof_out.csv')
|
| 81 |
+
|
| 82 |
+
product_kg_df = pd.concat([product_df_1, product_df_2, product_df_3, product_df_4]).drop_duplicates()
|
| 83 |
+
product_kg_df['propertyLabel'] = product_kg_df.propertyLabel.apply(lambda x: property_mapping[x])
|
| 84 |
+
|
| 85 |
+
return product_kg_df
|
| 86 |
+
|
| 87 |
+
@st.cache_data
|
| 88 |
+
def build_product_instance_df():
|
| 89 |
+
|
| 90 |
+
product_instance_df_1 = pd.read_csv('data/product_relations_instance_out.csv')
|
| 91 |
+
product_instance_df_2 = pd.read_csv('data/product_manufacturer_relations_instance_out.csv')
|
| 92 |
+
|
| 93 |
+
product_instance_df = pd.concat([product_instance_df_1, product_instance_df_2]).drop_duplicates()
|
| 94 |
+
product_instance_df['propertyLabel'] = product_instance_df.propertyLabel.apply(lambda x: property_mapping[x])
|
| 95 |
+
|
| 96 |
+
return product_instance_df
|
| 97 |
+
|
| 98 |
+
### For searching
|
| 99 |
+
|
| 100 |
def search_df(inp, df, col):
|
| 101 |
mask = df[col].str.contains(inp, case=False, regex=False)
|
| 102 |
select_df = df[mask]
|
|
|
|
| 104 |
|
| 105 |
##### Data Logic #####
|
| 106 |
|
| 107 |
+
# For competitor and industry analysis
|
| 108 |
COMPETITOR_PATH = 'data/merged_competitors_all_20241122.xlsx'
|
| 109 |
INDUSTRY_PATH = 'data/industry_hierarchy_20241125.xlsx'
|
| 110 |
|
| 111 |
+
# For customer analysis
|
| 112 |
+
COMPANY_PRODUCT_PATH = 'data/company_product_pairs.csv'
|
| 113 |
+
PRODUCT_MANUFACTURER_PATH = 'data/product_manufacturer_pair.csv'
|
| 114 |
+
|
| 115 |
# Load data
|
| 116 |
with st.spinner(text="Loading competitor data ..."):
|
| 117 |
competitor_df = load_pandas_xlsx(COMPETITOR_PATH)
|
|
|
|
| 129 |
### Pre computation Steps ###
|
| 130 |
|
| 131 |
# Pre compute unique number of companies per industry
|
|
|
|
|
|
|
|
|
|
| 132 |
industry_to_counts = build_industry_df(competitor_df)
|
| 133 |
|
| 134 |
# Pre compute unique number of companies per industry
|
|
|
|
|
|
|
|
|
|
| 135 |
product_to_counts = build_product_df(competitor_df)
|
| 136 |
|
| 137 |
+
### end ###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
# Title
|
| 140 |
+
st.title('3C Competitor / Customer Analysis Demo')
|
| 141 |
|
| 142 |
option = st.selectbox(
|
| 143 |
+
"Analysis Mode",
|
| 144 |
+
("Customer Analysis", "Competitor Analysis", "Industry Analysis", "Product Analysis")
|
| 145 |
)
|
| 146 |
|
| 147 |
st.write("You selected:", option)
|
| 148 |
|
|
|
|
| 149 |
##### App Logic #####
|
| 150 |
|
| 151 |
+
if option == "Competitor Analysis":
|
|
|
|
| 152 |
|
| 153 |
st.title("Searching by Company")
|
| 154 |
|
|
|
|
| 250 |
st.dataframe(competitors_by_country)
|
| 251 |
|
| 252 |
|
| 253 |
+
elif option == "Industry Analysis":
|
| 254 |
|
| 255 |
st.title("Searching by Industry")
|
| 256 |
|
|
|
|
| 327 |
st.graphviz_chart(graph)
|
| 328 |
|
| 329 |
|
| 330 |
+
elif option == "Product Analysis":
|
| 331 |
|
| 332 |
st.title("Searching by Product")
|
| 333 |
|
|
|
|
| 357 |
competitors = competitor_df[competitor_df['product'] == product['product']][['companyLabel', 'companyLabelJA', 'company', 'country', 'countryLabel']].drop_duplicates().copy()
|
| 358 |
st.dataframe(competitors)
|
| 359 |
|
| 360 |
+
elif option == "Customer Analysis":
|
| 361 |
+
|
| 362 |
+
# Load data
|
| 363 |
+
with st.spinner(text="Build company product knowledge graph ..."):
|
| 364 |
+
company_product_kg_df = build_company_product_kg(COMPANY_PRODUCT_PATH, PRODUCT_MANUFACTURER_PATH)
|
| 365 |
+
company_df_2 = build_company_df_2(company_product_kg_df)
|
| 366 |
+
st.success("Company Product Knowledge Graph Loaded!")
|
| 367 |
+
|
| 368 |
+
with st.spinner(text="Build product relationship knowledge graph ..."):
|
| 369 |
+
product_kg_df = build_product_kg_df()
|
| 370 |
+
st.success("Product Relationship Knowledge Graph Loaded!")
|
| 371 |
+
|
| 372 |
+
with st.spinner(text="Build product instance knowledge graph ..."):
|
| 373 |
+
product_instance_df = build_product_instance_df()
|
| 374 |
+
st.success("Product Instance Knowledge Graph Loaded!")
|
| 375 |
+
|
| 376 |
+
### Search Start
|
| 377 |
+
|
| 378 |
+
st.title("Searching by Company")
|
| 379 |
+
|
| 380 |
+
# Get input
|
| 381 |
+
inp = st_keyup("Enter a company name", value="toshiba", key="0", debounce=500)
|
| 382 |
+
|
| 383 |
+
# Perform search
|
| 384 |
+
select_df = search_df(inp, company_df_2, 'companyLabel')
|
| 385 |
+
|
| 386 |
+
# def show_data():
|
| 387 |
+
# select_value = st.session_state.value
|
| 388 |
+
# row_id=select_value['selection']['rows'][0]
|
| 389 |
+
# st.write(company_df.iloc[row_id])
|
| 390 |
+
|
| 391 |
+
# Show search results
|
| 392 |
+
with st.status("Searching ...", state="running", expanded=False) as status:
|
| 393 |
+
status.update(label=f"{len(select_df)} results found", state="complete", expanded=True)
|
| 394 |
+
|
| 395 |
+
### Selection for Company ###
|
| 396 |
+
st.dataframe(select_df, on_select="rerun", key="value", selection_mode="single-row")
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
# Expand if company is selected
|
| 400 |
+
select_value = st.session_state.value
|
| 401 |
+
if len(select_value['selection']['rows']) > 0:
|
| 402 |
+
|
| 403 |
+
st.title("Company Data")
|
| 404 |
+
|
| 405 |
+
row_id = select_value['selection']['rows'][0]
|
| 406 |
+
|
| 407 |
+
row = select_df.iloc[row_id]
|
| 408 |
+
|
| 409 |
+
entries = company_product_kg_df[company_product_kg_df.company == row.company]
|
| 410 |
+
|
| 411 |
+
st.write(f"Company Name: {row.companyLabel}")
|
| 412 |
+
st.write(f"Japanese Name: {row.companyLabelJA}")
|
| 413 |
+
st.write(f"Wikidata URL: {row.company}")
|
| 414 |
+
|
| 415 |
+
# st.write(f"Products or Services Provided: {list(set(list(entries.productLabel.unique()) + list(entries.productLabelJA.unique())))}")
|
| 416 |
+
|
| 417 |
+
st.write(f"Products and services provided by {row.companyLabel}")
|
| 418 |
+
|
| 419 |
+
product_select_df = company_product_kg_df[(company_product_kg_df.company == row.company) & (company_product_kg_df.propertyLabel == 'product_or_service_provided')][['productLabel', 'productLabelJA', 'product', 'company', 'companyLabel', 'companyLabelJA']]
|
| 420 |
+
|
| 421 |
+
### Selection for Product ###
|
| 422 |
+
st.dataframe(product_select_df, on_select="rerun", key="product", selection_mode="single-row")
|
| 423 |
+
select_product = st.session_state.product
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
# expand if product if selected
|
| 427 |
+
if len(select_product['selection']['rows']) > 0:
|
| 428 |
+
|
| 429 |
+
product_id = select_product['selection']['rows'][0]
|
| 430 |
+
|
| 431 |
+
target_product = product_select_df.iloc[product_id]
|
| 432 |
+
|
| 433 |
+
# st.title(f"All Product Categories produced by {row.companyLabel}")
|
| 434 |
+
# st.dataframe(competitors)
|
| 435 |
+
|
| 436 |
+
# Hypothesis
|
| 437 |
+
# for incoming relations: 'uses' of 'has_part' is useful, since it lists services that have selected product as a component
|
| 438 |
+
# for outgoing relations: 'has_use' and 'part_of' is useful, since it lists services that have selected product as a component
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
####### Build kg paths
|
| 442 |
+
|
| 443 |
+
### Step 1 ###
|
| 444 |
+
|
| 445 |
+
start_df = pd.DataFrame()
|
| 446 |
+
start_df[['company_start', 'companyLabel_start', 'companyLabelJA_start', 'product_start', 'productLabel_start', 'productLabelJA_start']] = [[target_product.company, target_product.companyLabel, target_product.companyLabelJA, target_product['product'], target_product.productLabel, target_product.productLabelJA]]
|
| 447 |
+
|
| 448 |
+
### Step 2 ###
|
| 449 |
+
|
| 450 |
+
related_out_df = product_kg_df[(product_kg_df['product'] == target_product['product']) & (product_kg_df['propertyLabel'].apply(lambda x: x in ['has_use', 'part_of']))]
|
| 451 |
+
related_in_df = product_kg_df[(product_kg_df['object'] == target_product['product']) & (product_kg_df['propertyLabel'].apply(lambda x: x in ['uses', 'has_part']))]
|
| 452 |
+
|
| 453 |
+
path_df = pd.concat(
|
| 454 |
+
[
|
| 455 |
+
start_df.merge(related_out_df[['product', 'object', 'objectLabel', 'objectLabelJa']], left_on='product_start', right_on='product').drop(columns=['product']).rename(columns={'object': 'product_second', 'objectLabel': 'productLabel_second', 'objectLabelJa': 'productLabelJa_second'}),
|
| 456 |
+
start_df.merge(related_in_df[['object', 'product', 'productLabel', 'productLabelJa']], left_on='product_start', right_on='object').drop(columns=['object']).rename(columns={'product': 'product_second', 'productLabel': 'productLabel_second', 'productLabelJa': 'productLabelJa_second'}),
|
| 457 |
+
]
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
# merge 1
|
| 461 |
+
|
| 462 |
+
### Step 3a ###
|
| 463 |
+
|
| 464 |
+
path_df_1 = path_df.merge(company_product_kg_df[['company', 'companyLabel', 'companyLabelJA', 'product']], left_on='product_second', right_on='product').drop(columns=['product'])
|
| 465 |
+
|
| 466 |
+
### Step 3b ###
|
| 467 |
+
|
| 468 |
+
path_df_2 = path_df.merge(product_instance_df[['object', 'objectLabel', 'objectLabelJa', 'product']], left_on='product_second', right_on='product').drop(columns=['product']).rename(columns={'object': 'product_third', 'objectLabel': 'productLabel_third', 'objectLabelJa': 'productLabelJa_third'})
|
| 469 |
+
path_df_2 = path_df_2.merge(company_product_kg_df[['company', 'companyLabel', 'companyLabelJA', 'product']], left_on='product_third', right_on='product').drop(columns=['product'])
|
| 470 |
+
|
| 471 |
+
### Step 3c ###
|
| 472 |
+
|
| 473 |
+
path_df_3 = path_df.merge(product_instance_df[['object', 'objectLabel', 'objectLabelJa', 'product']], left_on='product_second', right_on='product').drop(columns=['product']).rename(columns={'object': 'product_third', 'objectLabel': 'productLabel_third', 'objectLabelJa': 'productLabelJa_third'})
|
| 474 |
+
path_df_3 = path_df_3.merge(product_instance_df[['product', 'productLabel', 'productLabelJa', 'object']], left_on='product_third', right_on='object').drop(columns=['object']).rename(columns={'product': 'product_fourth', 'productLabel': 'productLabel_fourth', 'productLabelJa': 'productLabelJa_fourth'})
|
| 475 |
+
path_df_3 = path_df_3.merge(company_product_kg_df[['company', 'companyLabel', 'companyLabelJA', 'product']], left_on='product_fourth', right_on='product').drop(columns=['product'])
|
| 476 |
+
|
| 477 |
+
### Step 5 ###
|
| 478 |
+
|
| 479 |
+
path_df_1['length'] = 4
|
| 480 |
+
path_df_2['length'] = 5
|
| 481 |
+
path_df_3['length'] = 6
|
| 482 |
+
|
| 483 |
+
final_path_df = pd.concat([path_df_1, path_df_2, path_df_3])
|
| 484 |
+
|
| 485 |
+
final_path_df = final_path_df.reset_index(drop=True)
|
| 486 |
+
final_path_df['path_id'] = final_path_df.index
|
| 487 |
+
#final_path_df = final_path_df.set_index('path_id', drop=False)
|
| 488 |
+
|
| 489 |
+
final_company_df = final_path_df[['path_id', 'company', 'companyLabel', 'companyLabelJA']].copy()
|
| 490 |
+
|
| 491 |
+
### Step 6 ###
|
| 492 |
+
|
| 493 |
+
st.title(f"Potential Customers for {target_product.companyLabel} for product {target_product.productLabel}")
|
| 494 |
+
|
| 495 |
+
# st.dataframe(final_company_df)
|
| 496 |
+
|
| 497 |
+
st.dataframe(final_company_df, on_select="rerun", key="customer", selection_mode="single-row")
|
| 498 |
+
select_customer = st.session_state.customer
|
| 499 |
+
|
| 500 |
+
if len(select_customer['selection']['rows']) > 0:
|
| 501 |
+
|
| 502 |
+
customer_id = select_customer['selection']['rows'][0]
|
| 503 |
+
target_customer = final_company_df.iloc[customer_id]
|
| 504 |
+
|
| 505 |
+
customer_df = final_path_df[final_path_df.path_id == target_customer.path_id].iloc[0]
|
| 506 |
+
|
| 507 |
+
# import graphviz
|
| 508 |
+
|
| 509 |
+
# Create a graphlib graph object
|
| 510 |
+
graph = graphviz.Digraph()
|
| 511 |
+
|
| 512 |
+
graph.edge(customer_df.companyLabel_start, customer_df.productLabel_start, label=' produces')
|
| 513 |
+
graph.edge(customer_df.productLabel_start, customer_df.productLabel_second, label=' part of')
|
| 514 |
+
|
| 515 |
+
if customer_df.length == 4:
|
| 516 |
+
graph.edge(customer_df.productLabel_second, customer_df.companyLabel, label= ' produced by')
|
| 517 |
+
|
| 518 |
+
elif customer_df.length == 5:
|
| 519 |
+
graph.edge(customer_df.productLabel_second, customer_df.productLabel_third, label=' instance of')
|
| 520 |
+
graph.edge(customer_df.productLabel_third, customer_df.companyLabel, label= ' produced by')
|
| 521 |
+
|
| 522 |
+
if customer_df.length == 6:
|
| 523 |
+
graph.edge(customer_df.productLabel_second, customer_df.productLabel_third, label=' instance of')
|
| 524 |
+
graph.edge(customer_df.productLabel_fourth, customer_df.productLabel_third, ' instance of')
|
| 525 |
+
graph.edge(customer_df.productLabel_fourth, customer_df.companyLabel, label= ' produced by')
|
| 526 |
+
|
| 527 |
+
st.graphviz_chart(graph)
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
|
| 532 |
|
| 533 |
else:
|
| 534 |
|