| import gradio as gr |
| import pandas as pd |
| from sklearn.cluster import KMeans |
| import numpy as np |
| import os |
|
|
| from huggingface_hub import hf_hub_download |
|
|
| |
| hf_token = os.getenv("HF_TOKEN") |
|
|
| if hf_token is None: |
| print("ํ ํฐ์ด ์ค์ ๋์ง ์์์ต๋๋ค. Repository Secrets์ HF_TOKEN์ด ์๋์ง ํ์ธํ์ธ์.") |
| else: |
| print("ํ ํฐ์ด ์ค์ ๋์์ต๋๋ค. ํ์ผ์ ๋ค์ด๋ก๋ํฉ๋๋ค.") |
| |
| |
| file_path = hf_hub_download( |
| repo_id="HCIlab-SMWU/final_petfood_dataset", |
| filename="new_data_with_features.xlsx", |
| repo_type="dataset", |
| use_auth_token=hf_token |
| ) |
| print(f"ํ์ผ์ด ๋ค์ด๋ก๋๋์์ต๋๋ค: {file_path}") |
| |
| |
| df = pd.read_excel(file_path) |
| |
| |
| print(df.head()) |
| |
| |
| row_count = df.shape[0] |
| print(f"DataFrame์ ํ ๊ฐ์๋: {row_count}") |
|
|
|
|
|
|
| key1 = os.getenv("secret1") |
| key2 = os.getenv("secret2") |
|
|
| |
| def filter_feed(age_input, allergies_input, health_concerns_input, pet_type_input, sort_option): |
| data = df.copy() |
| error_message = "" |
|
|
| |
| if pet_type_input: |
| data = data[data['์ข
'] == pet_type_input] |
|
|
| |
| if age_input == "1์ด ๋ฏธ๋ง": |
| age_filter = ["ํผํผ", "ํคํผ", "์ ์ฐ๋ น"] |
| elif age_input == "1์ด ์ด์, 7์ด ์ดํ": |
| age_filter = ["์ด๋ํธ", "์ ์ฐ๋ น"] |
| else: |
| age_filter = ["์๋์ด", "์ ์ฐ๋ น"] |
|
|
| data = data[data['๊ธ์ฌ๋์'].str.contains('|'.join(age_filter), na=False)] |
|
|
| |
| if allergies_input: |
| allergy_pattern = '|'.join(allergies_input) |
| data = data[~data[key2].str.contains(allergy_pattern, na=False)] |
|
|
| |
| data = data[data[key2].notna() & (data[key2] != "")] |
|
|
| |
| health_mapping = { |
| "์น์/๊ตฌ๊ฐ": ["์น์์ ๊ฑฐ", "๊ตฌ๊ฐ๊ด๋ฆฌ"], |
| "๋ผ/๊ด์ ": ["๊ด์ ๊ฐํ"], |
| "ํผ๋ถ/๋ชจ์ง": ["ํผ๋ชจ๊ด๋ฆฌ"], |
| "์๋ฌ์ง": ["์ ์๋ฌ์ง"], |
| "๋น๋ง": ["๋ค์ด์ดํธ/์ค์ฑํ", "์ฒด์ค์ ์ง"], |
| "๋น๋จ๊ธฐ": ["์ ๋ฆฌ๋๋ฆฌ(๋น๋จ๊ณ)", "๊ฒฐ์์๋ฐฉ", "์ ์ฅ/์๋ก", "์์๋์ฆ์ง"], |
| "๋": ["๋๊ฑด๊ฐ"], |
| "์ํ๊ธฐ": ["์ํ๊ฐ์ "], |
| "ํ๋": ["๋ถ๋ฆฌ๋ถ์ํด์", "์คํธ๋ ์ค์ํ"], |
| "์ฌ์ฅ": ["์ฌ์ฅ๊ฑด๊ฐ"], |
| "ํธํก๊ธฐ": ["ํธํก๊ธฐ๊ด๋ฆฌ"], |
| "๋
ธํ": ["ํญ์ฐํ"], |
| "ํค์ด๋ณผ": ["ํค์ด๋ณผ"] |
| } |
|
|
| health_patterns = [] |
|
|
| |
| if len(health_concerns_input) > 3: |
| error_message = "๊ฑด๊ฐ ๊ณ ๋ฏผ์ ์ต๋ 3๊ฐ๊น์ง ์ ํ ๊ฐ๋ฅํฉ๋๋ค." |
| return pd.DataFrame(), error_message |
|
|
| for concern in health_concerns_input: |
| health_patterns.append(health_mapping.get(concern, [])) |
|
|
| |
| filtered_data = data.copy() |
|
|
| for patterns in health_patterns: |
| if patterns: |
| filtered_data = filtered_data[filtered_data[key1].str.contains('|'.join(patterns), na=False)] |
|
|
| |
| results = filtered_data[['Cleaned_Product_Name', '์ข
', '๊ธ์ฌ๋์', '๊ธฐ๋ฅ', '์ฃผ์๋ฃ']] |
|
|
| |
| if len(results) > 10: |
| |
| one_hot_ingredients = pd.get_dummies(results[key2].str.split('|').explode()).groupby(level=0).max() |
| one_hot_features = pd.get_dummies(results[key1].str.split('|').explode()).groupby(level=0).max() |
| |
| |
| features = pd.concat([one_hot_ingredients, one_hot_features], axis=1) |
| |
| |
| kmeans = KMeans(n_clusters=10, random_state=0) |
| kmeans.fit(features) |
| |
| |
| distances = kmeans.transform(features) |
| |
| |
| results['cluster'] = kmeans.labels_ |
| results['distance_to_centroid'] = [distances[i][label] for i, label in enumerate(kmeans.labels_)] |
| |
| |
| closest_to_centroid = results.loc[results.groupby('cluster')['distance_to_centroid'].idxmin()] |
| |
| |
| if sort_option == "๊ฐ๋๋ค์": |
| closest_to_centroid = closest_to_centroid.sort_values(by=['Cleaned_Product_Name']).reset_index(drop=True) |
|
|
|
|
| results = closest_to_centroid[['Cleaned_Product_Name', '์ข
', '๊ธ์ฌ๋์', '๊ธฐ๋ฅ', '์ฃผ์๋ฃ']] |
| |
| results = results.rename(columns={'Cleaned_Product_Name': '์ฌ๋ฃ์ด๋ฆ'}) |
|
|
| |
| |
| |
| if results.empty: |
| error_message = "์๋ฌ์ง์ ๊ฑด๊ฐ๊ณ ๋ฏผ ์กฐ๊ฑด์ ๋ชจ๋ ๋ง์กฑํ๋ ์ฌ๋ฃ๊ฐ ์์ต๋๋ค." |
|
|
| return results, error_message |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# ๋ฐ๋ ค๋๋ฌผ ์ฌ๋ฃ ์ถ์ฒ") |
| |
| age_input = gr.Dropdown(["1์ด ๋ฏธ๋ง", "1์ด ์ด์, 7์ด ์ดํ", "7์ด ์ด์"], label="์ฐ๋ น๋") |
| allergies_input = gr.CheckboxGroup( |
| ["์", "๋ผ์ง", "๋ญ", "์ค๋ฆฌ", "์", "์น ๋ฉด์กฐ", "์์ /ํด์ฐ๋ฌผ", "์ฌ์ด", "์ฐ์ด", "์น์ฆ/์ ์ง๋ฐฉ", "์ฐธ์น", "๋ฐ", "์","๊ณ ๊ตฌ๋ง","๊ณก๋ฌผ","๊ณค์ถฉ","๊ณผ์ผ/์ผ์ฑ","๋ถ์ด","์ฒญ์ด"], |
| label="์ฃผ์๋ฃ ์๋ฌ์ง" |
| ) |
| health_concerns_input = gr.CheckboxGroup( |
| ["์น์/๊ตฌ๊ฐ", "๋ผ/๊ด์ ", "ํผ๋ถ/๋ชจ์ง", "์๋ฌ์ง", "๋น๋ง", "๋น๋จ๊ธฐ", "๋", "์ํ๊ธฐ", "ํ๋", "์ฌ์ฅ", "ํธํก๊ธฐ", "๋
ธํ", "ํค์ด๋ณผ"], |
| label="๊ฑด๊ฐ ๊ณ ๋ฏผ (์ต๋ 3๊ฐ ์ ํ ๊ฐ๋ฅ)" |
| ) |
| pet_type_input = gr.Dropdown(["๊ฐ์์ง", "๊ณ ์์ด"], label="๋ฐ๋ ค๋๋ฌผ ์ข
๋ฅ") |
| sort_option = gr.Radio(["์ถ์ฒ์", "๊ฐ๋๋ค์"], label="์ ๋ ฌ ๋ฐฉ์", value="์ถ์ฒ์") |
| submit_button = gr.Button("์ถ์ฒ ์ฌ๋ฃ ๋ณด๊ธฐ") |
| output = gr.Dataframe() |
| error_output = gr.Textbox(label="์๋ฌ ๋ฉ์์ง", interactive=False) |
| |
| sort_option.change( |
| fn=filter_feed, |
| inputs=[age_input, allergies_input, health_concerns_input, pet_type_input, sort_option], |
| outputs=[output, error_output] |
| ) |
| |
| submit_button.click( |
| fn=filter_feed, |
| inputs=[age_input, allergies_input, health_concerns_input, pet_type_input, sort_option], |
| outputs=[output, error_output] |
| ) |
|
|
| def check_health_concerns(health_concerns_input): |
| if len(health_concerns_input) > 3: |
| return "๊ฑด๊ฐ ๊ณ ๋ฏผ์ ์ต๋ 3๊ฐ๊น์ง ์ ํ ๊ฐ๋ฅํฉ๋๋ค." |
| return "" |
|
|
| health_concerns_input.change(fn=check_health_concerns, inputs=health_concerns_input, outputs=error_output) |
|
|
| demo.launch(share=True) |