Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import os | |
| import time | |
| import threading | |
| import tempfile | |
| import logging | |
| import random | |
| import uuid | |
| import shutil | |
| import glob | |
| from datetime import datetime | |
| import sys | |
| import types | |
| # λ‘κΉ μ€μ | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(), | |
| logging.FileHandler('main_keyword_app.log', mode='a') | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # νκ²½λ³μμμ λͺ¨λ μ½λ λ‘λ λ° λμ μμ± | |
| def load_module_from_env(module_name, env_var_name): | |
| """νκ²½λ³μμμ λͺ¨λ μ½λλ₯Ό λ‘λνμ¬ λμ μΌλ‘ λͺ¨λ μμ±""" | |
| try: | |
| module_code = os.getenv(env_var_name) | |
| if not module_code: | |
| raise ValueError(f"νκ²½λ³μ {env_var_name}κ° μ€μ λμ§ μμμ΅λλ€.") | |
| # μ λͺ¨λ μμ± | |
| module = types.ModuleType(module_name) | |
| # λͺ¨λμ νμν κΈ°λ³Έ μν¬νΈλ€ μΆκ° | |
| module.__dict__.update({ | |
| 'os': __import__('os'), | |
| 'time': __import__('time'), | |
| 'logging': __import__('logging'), | |
| 'pandas': __import__('pandas'), | |
| 'requests': __import__('requests'), | |
| 'tempfile': __import__('tempfile'), | |
| 'threading': __import__('threading'), | |
| 're': __import__('re'), | |
| 'random': __import__('random'), | |
| 'uuid': __import__('uuid'), | |
| 'shutil': __import__('shutil'), | |
| 'glob': __import__('glob'), | |
| 'datetime': __import__('datetime'), | |
| 'types': __import__('types'), | |
| 'collections': __import__('collections'), | |
| 'Counter': __import__('collections').Counter, | |
| 'defaultdict': __import__('collections').defaultdict, | |
| 'hmac': __import__('hmac'), | |
| 'hashlib': __import__('hashlib'), | |
| 'base64': __import__('base64'), | |
| }) | |
| # μ½λ μ€ν | |
| exec(module_code, module.__dict__) | |
| # μμ€ν λͺ¨λμ λ±λ‘ | |
| sys.modules[module_name] = module | |
| logger.info(f"β λͺ¨λ {module_name} λ‘λ μλ£") | |
| return module | |
| except Exception as e: | |
| logger.error(f"β λͺ¨λ {module_name} λ‘λ μ€ν¨: {e}") | |
| raise | |
| # νμν λͺ¨λλ€μ νκ²½λ³μμμ λ‘λ | |
| logger.info("π λͺ¨λ λ‘λ μμ...") | |
| try: | |
| # 1. api_utils λͺ¨λ λ‘λ | |
| api_utils = load_module_from_env('api_utils', 'API_UTILS_CODE') | |
| # 2. text_utils λͺ¨λ λ‘λ (λ€λ₯Έ λͺ¨λλ€μ΄ μμ‘΄νλ―λ‘ λ¨Όμ λ‘λ) | |
| text_utils = load_module_from_env('text_utils', 'TEXT_UTILS_CODE') | |
| # 3. keyword_search λͺ¨λ λ‘λ | |
| keyword_search = load_module_from_env('keyword_search', 'KEYWORD_SEARCH_CODE') | |
| # 4. product_search λͺ¨λ λ‘λ (text_utils, keyword_search μμ‘΄) | |
| product_search_module = load_module_from_env('product_search', 'PRODUCT_SEARCH_CODE') | |
| # product_search λͺ¨λμ μμ‘΄μ± μ£Όμ | |
| product_search_module.api_utils = api_utils | |
| product_search_module.text_utils = text_utils | |
| product_search = product_search_module | |
| # 5. keyword_processor λͺ¨λ λ‘λ | |
| keyword_processor_module = load_module_from_env('keyword_processor', 'KEYWORD_PROCESSOR_CODE') | |
| # keyword_processor λͺ¨λμ μμ‘΄μ± μ£Όμ | |
| keyword_processor_module.text_utils = text_utils | |
| keyword_processor_module.keyword_search = keyword_search | |
| keyword_processor_module.product_search = product_search | |
| keyword_processor = keyword_processor_module | |
| # 6. export_utils λͺ¨λ λ‘λ | |
| export_utils = load_module_from_env('export_utils', 'EXPORT_UTILS_CODE') | |
| # 7. category_analysis λͺ¨λ λ‘λ (λͺ¨λ λͺ¨λ μμ‘΄) | |
| category_analysis_module = load_module_from_env('category_analysis', 'CATEGORY_ANALYSIS_CODE') | |
| # category_analysis λͺ¨λμ μμ‘΄μ± μ£Όμ | |
| category_analysis_module.text_utils = text_utils | |
| category_analysis_module.product_search = product_search | |
| category_analysis_module.keyword_search = keyword_search | |
| category_analysis = category_analysis_module | |
| logger.info("β λͺ¨λ λͺ¨λ λ‘λ μλ£") | |
| except Exception as e: | |
| logger.error(f"β λͺ¨λ λ‘λ μ€ μΉλͺ μ μ€λ₯: {e}") | |
| logger.error("νμν νκ²½λ³μλ€μ΄ μ€μ λμλμ§ νμΈνμΈμ:") | |
| logger.error("- API_UTILS_CODE") | |
| logger.error("- TEXT_UTILS_CODE") | |
| logger.error("- KEYWORD_SEARCH_CODE") | |
| logger.error("- PRODUCT_SEARCH_CODE") | |
| logger.error("- KEYWORD_PROCESSOR_CODE") | |
| logger.error("- EXPORT_UTILS_CODE") | |
| logger.error("- CATEGORY_ANALYSIS_CODE") | |
| raise | |
| # μΈμ λ³ μμ νμΌ κ΄λ¦¬λ₯Ό μν λμ λ리 | |
| session_temp_files = {} | |
| session_data = {} | |
| def cleanup_huggingface_temp_folders(): | |
| """νκΉ νμ΄μ€ μμ ν΄λ μ΄κΈ° μ 리""" | |
| try: | |
| # μΌλ°μ μΈ μμ λλ ν λ¦¬λ€ | |
| temp_dirs = [ | |
| tempfile.gettempdir(), | |
| "/tmp", | |
| "/var/tmp", | |
| os.path.join(os.getcwd(), "temp"), | |
| os.path.join(os.getcwd(), "tmp"), | |
| "/gradio_cached_examples", | |
| "/flagged" | |
| ] | |
| cleanup_count = 0 | |
| for temp_dir in temp_dirs: | |
| if os.path.exists(temp_dir): | |
| try: | |
| # κΈ°μ‘΄ μΈμ νμΌλ€ μ 리 | |
| session_files = glob.glob(os.path.join(temp_dir, "session_*.xlsx")) | |
| session_files.extend(glob.glob(os.path.join(temp_dir, "session_*.csv"))) | |
| session_files.extend(glob.glob(os.path.join(temp_dir, "*keyword*.xlsx"))) | |
| session_files.extend(glob.glob(os.path.join(temp_dir, "*keyword*.csv"))) | |
| session_files.extend(glob.glob(os.path.join(temp_dir, "tmp*.xlsx"))) | |
| session_files.extend(glob.glob(os.path.join(temp_dir, "tmp*.csv"))) | |
| for file_path in session_files: | |
| try: | |
| # νμΌμ΄ 1μκ° μ΄μ μ€λλ κ²½μ°λ§ μμ | |
| if os.path.getmtime(file_path) < time.time() - 3600: | |
| os.remove(file_path) | |
| cleanup_count += 1 | |
| logger.info(f"μ΄κΈ° μ 리: μ€λλ μμ νμΌ μμ - {file_path}") | |
| except Exception as e: | |
| logger.warning(f"νμΌ μμ μ€ν¨ (무μλ¨): {file_path} - {e}") | |
| except Exception as e: | |
| logger.warning(f"μμ λλ ν 리 μ 리 μ€ν¨ (무μλ¨): {temp_dir} - {e}") | |
| logger.info(f"β νκΉ νμ΄μ€ μμ ν΄λ μ΄κΈ° μ 리 μλ£ - {cleanup_count}κ° νμΌ μμ ") | |
| # Gradio μΊμ ν΄λλ μ 리 | |
| try: | |
| gradio_temp_dir = os.path.join(os.getcwd(), "gradio_cached_examples") | |
| if os.path.exists(gradio_temp_dir): | |
| shutil.rmtree(gradio_temp_dir, ignore_errors=True) | |
| logger.info("Gradio μΊμ ν΄λ μ 리 μλ£") | |
| except Exception as e: | |
| logger.warning(f"Gradio μΊμ ν΄λ μ 리 μ€ν¨ (무μλ¨): {e}") | |
| except Exception as e: | |
| logger.error(f"μ΄κΈ° μμ ν΄λ μ 리 μ€ μ€λ₯ (κ³μ μ§ν): {e}") | |
| def setup_clean_temp_environment(): | |
| """κΉ¨λν μμ νκ²½ μ€μ """ | |
| try: | |
| # 1. κΈ°μ‘΄ μμ νμΌλ€ μ 리 | |
| cleanup_huggingface_temp_folders() | |
| # 2. μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 μμ± | |
| app_temp_dir = os.path.join(tempfile.gettempdir(), "keyword_app") | |
| if os.path.exists(app_temp_dir): | |
| shutil.rmtree(app_temp_dir, ignore_errors=True) | |
| os.makedirs(app_temp_dir, exist_ok=True) | |
| # 3. νκ²½ λ³μ μ€μ (μμ λλ ν 리 μ§μ ) | |
| os.environ['KEYWORD_APP_TEMP'] = app_temp_dir | |
| logger.info(f"β μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 μ€μ : {app_temp_dir}") | |
| return app_temp_dir | |
| except Exception as e: | |
| logger.error(f"μμ νκ²½ μ€μ μ€ν¨: {e}") | |
| return tempfile.gettempdir() | |
| def get_app_temp_dir(): | |
| """μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 λ°ν""" | |
| return os.environ.get('KEYWORD_APP_TEMP', tempfile.gettempdir()) | |
| def get_session_id(): | |
| """μΈμ ID μμ±""" | |
| return str(uuid.uuid4()) | |
| def cleanup_session_files(session_id, delay=300): | |
| """μΈμ λ³ μμ νμΌ μ 리 ν¨μ""" | |
| def cleanup(): | |
| time.sleep(delay) | |
| if session_id in session_temp_files: | |
| files_to_remove = session_temp_files[session_id].copy() | |
| del session_temp_files[session_id] | |
| for file_path in files_to_remove: | |
| try: | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| logger.info(f"μΈμ {session_id[:8]}... μμ νμΌ μμ : {file_path}") | |
| except Exception as e: | |
| logger.error(f"μΈμ {session_id[:8]}... νμΌ μμ μ€λ₯: {e}") | |
| threading.Thread(target=cleanup, daemon=True).start() | |
| def register_session_file(session_id, file_path): | |
| """μΈμ λ³ νμΌ λ±λ‘""" | |
| if session_id not in session_temp_files: | |
| session_temp_files[session_id] = [] | |
| session_temp_files[session_id].append(file_path) | |
| def cleanup_old_sessions(): | |
| """μ€λλ μΈμ λ°μ΄ν° μ 리""" | |
| current_time = time.time() | |
| sessions_to_remove = [] | |
| for session_id, data in session_data.items(): | |
| if current_time - data.get('last_activity', 0) > 3600: # 1μκ° μ΄κ³Ό | |
| sessions_to_remove.append(session_id) | |
| for session_id in sessions_to_remove: | |
| # νμΌ μ 리 | |
| if session_id in session_temp_files: | |
| for file_path in session_temp_files[session_id]: | |
| try: | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| logger.info(f"μ€λλ μΈμ {session_id[:8]}... νμΌ μμ : {file_path}") | |
| except Exception as e: | |
| logger.error(f"μ€λλ μΈμ νμΌ μμ μ€λ₯: {e}") | |
| del session_temp_files[session_id] | |
| # μΈμ λ°μ΄ν° μ 리 | |
| if session_id in session_data: | |
| del session_data[session_id] | |
| logger.info(f"μ€λλ μΈμ λ°μ΄ν° μμ : {session_id[:8]}...") | |
| def update_session_activity(session_id): | |
| """μΈμ νλ μκ° μ λ°μ΄νΈ""" | |
| if session_id not in session_data: | |
| session_data[session_id] = {} | |
| session_data[session_id]['last_activity'] = time.time() | |
| def create_session_temp_file(session_id, suffix='.xlsx'): | |
| """μΈμ λ³ μμ νμΌ μμ± (μ μ© λλ ν 리 μ¬μ©)""" | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| random_suffix = str(random.randint(1000, 9999)) | |
| # μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 μ¬μ© | |
| temp_dir = get_app_temp_dir() | |
| filename = f"session_{session_id[:8]}_{timestamp}_{random_suffix}{suffix}" | |
| temp_file_path = os.path.join(temp_dir, filename) | |
| # λΉ νμΌ μμ± | |
| with open(temp_file_path, 'w') as f: | |
| pass | |
| register_session_file(session_id, temp_file_path) | |
| return temp_file_path | |
| def wrapper_modified(keyword, korean_only, apply_main_keyword_option, exclude_zero_volume, session_id): | |
| """ν€μλ κ²μ λ° μ²λ¦¬ λνΌ ν¨μ (μΈμ ID μΆκ°)""" | |
| update_session_activity(session_id) | |
| # νμ¬ ν€μλ μ¬μ© (μΈμ λ³λ‘ κ΄λ¦¬) | |
| current_keyword = keyword | |
| # ν€μλκ° λΉμ΄μλ κ²½μ° μ²λ¦¬ | |
| if not keyword: | |
| return (gr.update(value=""), gr.update(choices=["μ 체 보기"]), gr.update(choices=["μ 체"]), | |
| None, gr.update(choices=["μ 체 보기"], value="μ 체 보기"), None, | |
| gr.update(visible=False), gr.update(visible=False), current_keyword) | |
| # λ€μ΄λ² μΌν API κ²μ μν | |
| search_results = product_search.fetch_naver_shopping_data(keyword, korean_only, apply_main_keyword_option == "λ©μΈν€μλ μ μ©") | |
| # κ²μ κ²°κ³Όκ° μλ κ²½μ° | |
| if not search_results.get("product_list"): | |
| return (gr.update(value="<p>κ²μ κ²°κ³Όκ° μμ΅λλ€. λ€λ₯Έ ν€μλλ‘ μλν΄λ³΄μΈμ.</p>"), | |
| gr.update(choices=["μ 체 보기"]), gr.update(choices=["μ 체"]), | |
| None, gr.update(choices=["μ 체 보기"], value="μ 체 보기"), None, | |
| gr.update(visible=False), gr.update(visible=False), current_keyword) | |
| # κ²μ κ²°κ³Ό μ²λ¦¬ - ν€μλ μ λ¬ λ° κ²μλ 0 ν€μλ μ μΈ μ΅μ μ λ¬ | |
| result = keyword_processor.process_search_results(search_results, current_keyword, exclude_zero_volume) | |
| df_products = result["products_df"] | |
| df_keywords = result["keywords_df"] | |
| category_list = result["categories"] | |
| if df_keywords.empty: | |
| return (gr.update(value="<p>μΆμΆλ ν€μλκ° μμ΅λλ€. λ€λ₯Έ μ΅μ μΌλ‘ μλν΄λ³΄μΈμ.</p>"), | |
| gr.update(choices=["μ 체 보기"]), gr.update(choices=["μ 체"]), | |
| df_keywords, gr.update(choices=["μ 체 보기"], value="μ 체 보기"), None, | |
| gr.update(visible=False), gr.update(visible=False), current_keyword) | |
| # HTML ν μ΄λΈ μμ± | |
| html = export_utils.create_table_without_checkboxes(df_keywords) | |
| # νν°λ§μ μν κ³ μ κ° λ¦¬μ€νΈ μμ± | |
| volume_range_choices = ["μ 체"] + sorted(df_keywords["κ²μλꡬκ°"].unique().tolist()) | |
| # λΆμν μΉ΄ν κ³ λ¦¬ λλ‘λ€μ΄λ κ°μ μ νμ§λ‘ μ λ°μ΄νΈ | |
| first_category = category_list[0] if category_list else "μ 체 보기" | |
| # μΈμ λ³ μμ νμΌ μμ± | |
| excel_path = create_session_excel_file(df_keywords, session_id) | |
| # λΆμ μΉμ νμ | |
| return (gr.update(value=html), gr.update(choices=category_list), gr.update(choices=volume_range_choices), | |
| df_keywords, gr.update(choices=category_list, value=first_category), excel_path, | |
| gr.update(visible=True), gr.update(visible=True), current_keyword) | |
| def create_session_excel_file(df, session_id): | |
| """μΈμ λ³ μμ νμΌ μμ±""" | |
| try: | |
| excel_path = create_session_temp_file(session_id, '.xlsx') | |
| df.to_excel(excel_path, index=False, engine='openpyxl') | |
| logger.info(f"μΈμ {session_id[:8]}... μμ νμΌ μμ±: {excel_path}") | |
| return excel_path | |
| except Exception as e: | |
| logger.error(f"μΈμ λ³ μμ νμΌ μμ± μ€λ₯: {e}") | |
| return None | |
| def analyze_with_auto_download(analysis_keywords, selected_category, state_df, session_id): | |
| """μΉ΄ν κ³ λ¦¬ μΌμΉ λΆμ μ€ν λ° μλ λ€μ΄λ‘λ (μΈμ ID μΆκ°)""" | |
| update_session_activity(session_id) | |
| # λΆμν ν€μλλ μΉ΄ν κ³ λ¦¬κ° μλ κ²½μ° | |
| if not analysis_keywords or not selected_category: | |
| return "ν€μλμ μΉ΄ν κ³ λ¦¬λ₯Ό λͺ¨λ μ νν΄μ£ΌμΈμ.", None, gr.update(visible=False) | |
| # λΆμ μ€ν - λμ λ‘λ©λ category_analysis λͺ¨λ μ¬μ© | |
| analysis_result = category_analysis.analyze_keywords_by_category(analysis_keywords, selected_category, state_df) | |
| # μΈμ λ³ μμ νμΌ μμ± | |
| excel_path = create_session_excel_file(state_df, session_id) | |
| # λΆμ κ²°κ³Ό μΆλ ₯ μΉμ νμ | |
| return analysis_result, excel_path, gr.update(visible=True) | |
| def filter_and_sort_table(df, selected_cat, keyword_sort, total_volume_sort, usage_count_sort, selected_volume_range, exclude_zero_volume, session_id): | |
| """ν μ΄λΈ νν°λ§ λ° μ λ ¬ ν¨μ (μΈμ ID μΆκ°)""" | |
| update_session_activity(session_id) | |
| if df is None or df.empty: | |
| return "" | |
| # νν°λ§ μ μ© | |
| filtered_df = df.copy() | |
| # μΉ΄ν κ³ λ¦¬ νν° μ μ© | |
| if selected_cat and selected_cat != "μ 체 보기": | |
| cat_name_to_filter = selected_cat.rsplit(" (", 1)[0] | |
| filtered_df = filtered_df[filtered_df["κ΄λ ¨ μΉ΄ν κ³ λ¦¬"].astype(str).str.contains(cat_name_to_filter, case=False, na=False)] | |
| def get_filtered_category_display(current_categories_str): | |
| if pd.isna(current_categories_str): | |
| return "" | |
| categories = str(current_categories_str).split('\n') | |
| matched_categories = [cat for cat in categories if cat_name_to_filter.lower() in cat.lower()] | |
| if matched_categories: | |
| return "\n".join(matched_categories) | |
| return current_categories_str | |
| filtered_df['κ΄λ ¨ μΉ΄ν κ³ λ¦¬'] = filtered_df['κ΄λ ¨ μΉ΄ν κ³ λ¦¬'].apply(get_filtered_category_display) | |
| # κ²μλ κ΅¬κ° νν° μ μ© | |
| if selected_volume_range and selected_volume_range != "μ 체": | |
| filtered_df = filtered_df[filtered_df["κ²μλꡬκ°"] == selected_volume_range] | |
| # κ²μλ 0 μ μΈ νν° μ μ© | |
| if exclude_zero_volume: | |
| filtered_df = filtered_df[filtered_df["μ΄κ²μλ"] > 0] | |
| logger.info(f"μΈμ {session_id[:8]}... κ²μλ 0 μ μΈ νν° μ μ© - λ¨μ ν€μλ μ: {len(filtered_df)}") | |
| # μ λ ¬ μ μ© | |
| if keyword_sort != "μ λ ¬ μμ": | |
| is_ascending = keyword_sort == "μ€λ¦μ°¨μ" | |
| filtered_df = filtered_df.sort_values(by="μ‘°ν© ν€μλ", ascending=is_ascending) | |
| if total_volume_sort != "μ λ ¬ μμ": | |
| is_ascending = total_volume_sort == "μ€λ¦μ°¨μ" | |
| filtered_df = filtered_df.sort_values(by="μ΄κ²μλ", ascending=is_ascending) | |
| # ν€μλ μ¬μ©νμ μ λ ¬ μ μ© | |
| if usage_count_sort != "μ λ ¬ μμ": | |
| is_ascending = usage_count_sort == "μ€λ¦μ°¨μ" | |
| filtered_df = filtered_df.sort_values(by="ν€μλ μ¬μ©νμ", ascending=is_ascending) | |
| # μλ²μ 1λΆν° μμ°¨μ μΌλ‘ μ μ§νκΈ° μν΄ ν μΈλ±μ€ μ¬μ€μ | |
| filtered_df = filtered_df.reset_index(drop=True) | |
| # μλ²μ ν¬ν¨ν HTML ν μ΄λΈ μμ± | |
| html = export_utils.create_table_without_checkboxes(filtered_df) | |
| return html | |
| def update_category_selection(selected_cat, session_id): | |
| """μΉ΄ν κ³ λ¦¬ νν° μ ν μ λΆμν μΉ΄ν κ³ λ¦¬λ κ°μ κ°μΌλ‘ μ λ°μ΄νΈ""" | |
| update_session_activity(session_id) | |
| logger.debug(f"μΈμ {session_id[:8]}... μΉ΄ν κ³ λ¦¬ μ ν λ³κ²½: {selected_cat}") | |
| return gr.update(value=selected_cat) | |
| def reset_interface(session_id): | |
| """μΈν°νμ΄μ€ 리μ ν¨μ - μΈμ λ³ λ°μ΄ν° μ΄κΈ°ν""" | |
| update_session_activity(session_id) | |
| # μΈμ λ³ μμ νμΌ μ 리 | |
| if session_id in session_temp_files: | |
| for file_path in session_temp_files[session_id]: | |
| try: | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| logger.info(f"μΈμ {session_id[:8]}... 리μ μ νμΌ μμ : {file_path}") | |
| except Exception as e: | |
| logger.error(f"μΈμ {session_id[:8]}... 리μ μ νμΌ μμ μ€λ₯: {e}") | |
| session_temp_files[session_id] = [] | |
| return ( | |
| "", # κ²μ ν€μλ | |
| True, # νκΈλ§ μΆμΆ | |
| False, # κ²μλ 0 ν€μλ μ μΈ | |
| "λ©μΈν€μλ μ μ©", # μ‘°ν© λ°©μ | |
| "", # HTML ν μ΄λΈ | |
| ["μ 체 보기"], # μΉ΄ν κ³ λ¦¬ νν° | |
| "μ 체 보기", # μΉ΄ν κ³ λ¦¬ νν° μ ν | |
| ["μ 체"], # κ²μλ κ΅¬κ° νν° | |
| "μ 체", # κ²μλ κ΅¬κ° μ ν | |
| "μ λ ¬ μμ", # μ΄κ²μλ μ λ ¬ | |
| "μ λ ¬ μμ", # ν€μλ μ¬μ©νμ μ λ ¬ | |
| None, # μν DataFrame | |
| ["μ 체 보기"], # λΆμν μΉ΄ν κ³ λ¦¬ | |
| "μ 체 보기", # λΆμν μΉ΄ν κ³ λ¦¬ μ ν | |
| "", # ν€μλ μ λ ₯ | |
| "", # λΆμ κ²°κ³Ό | |
| None, # λ€μ΄λ‘λ νμΌ | |
| gr.update(visible=False), # ν€μλ λΆμ μΉμ | |
| gr.update(visible=False), # λΆμ κ²°κ³Ό μΆλ ₯ μΉμ | |
| "" # ν€μλ μν | |
| ) | |
| # λνΌ ν¨μλ€λ μΈμ ID μΆκ° | |
| def search_with_loading(keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id): | |
| update_session_activity(session_id) | |
| return ( | |
| gr.update(visible=True), | |
| gr.update(visible=False) | |
| ) | |
| def process_search_results(keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id): | |
| update_session_activity(session_id) | |
| result = wrapper_modified(keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id) | |
| table_html, cat_choices, vol_choices, df, selected_cat, excel, keyword_section_vis, cat_section_vis, new_keyword_state = result | |
| if not isinstance(df, type(None)) and not df.empty: | |
| empty_placeholder_vis = False | |
| keyword_section_visibility = True | |
| execution_section_visibility = True | |
| else: | |
| empty_placeholder_vis = True | |
| keyword_section_visibility = False | |
| execution_section_visibility = False | |
| return ( | |
| table_html, cat_choices, vol_choices, df, selected_cat, excel, | |
| gr.update(visible=keyword_section_visibility), | |
| gr.update(visible=cat_section_vis), | |
| gr.update(visible=False), | |
| gr.update(visible=empty_placeholder_vis), | |
| gr.update(visible=execution_section_visibility), | |
| new_keyword_state | |
| ) | |
| def analyze_with_loading(analysis_keywords, selected_category, state_df, session_id): | |
| update_session_activity(session_id) | |
| return gr.update(visible=True) | |
| def process_analyze_results(analysis_keywords, selected_category, state_df, session_id): | |
| update_session_activity(session_id) | |
| results = analyze_with_auto_download(analysis_keywords, selected_category, state_df, session_id) | |
| return results + (gr.update(visible=False),) | |
| # μΈμ μ 리 μ€μΌμ€λ¬ | |
| def start_session_cleanup_scheduler(): | |
| """μΈμ μ 리 μ€μΌμ€λ¬ μμ""" | |
| def cleanup_scheduler(): | |
| while True: | |
| time.sleep(600) # 10λΆλ§λ€ μ€ν | |
| cleanup_old_sessions() | |
| # μΆκ°λ‘ νκΉ νμ΄μ€ μμ ν΄λλ μ£ΌκΈ°μ μ 리 | |
| cleanup_huggingface_temp_folders() | |
| threading.Thread(target=cleanup_scheduler, daemon=True).start() | |
| def cleanup_on_startup(): | |
| """μ ν리μΌμ΄μ μμ μ μ 체 μ 리""" | |
| logger.info("π§Ή μ ν리μΌμ΄μ μμ - μ΄κΈ° μ 리 μμ μμ...") | |
| # 1. νκΉ νμ΄μ€ μμ ν΄λ μ 리 | |
| cleanup_huggingface_temp_folders() | |
| # 2. κΉ¨λν μμ νκ²½ μ€μ | |
| app_temp_dir = setup_clean_temp_environment() | |
| # 3. μ μ λ³μ μ΄κΈ°ν | |
| global session_temp_files, session_data | |
| session_temp_files.clear() | |
| session_data.clear() | |
| logger.info(f"β μ΄κΈ° μ 리 μμ μλ£ - μ± μ μ© λλ ν 리: {app_temp_dir}") | |
| return app_temp_dir | |
| # Gradio μΈν°νμ΄μ€ μμ± | |
| def create_app(): | |
| fontawesome_html = """ | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"> | |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/orioncactus/pretendard/dist/web/static/pretendard.css"> | |
| <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700&display=swap"> | |
| """ | |
| # CSS νμΌ λ‘λ | |
| try: | |
| with open('style.css', 'r', encoding='utf-8') as f: | |
| custom_css = f.read() | |
| except: | |
| custom_css = """ | |
| :root { | |
| --primary-color: #FB7F0D; | |
| --secondary-color: #ff9a8b; | |
| } | |
| .custom-button { | |
| background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)) !important; | |
| color: white !important; | |
| border-radius: 30px !important; | |
| height: 45px !important; | |
| font-size: 16px !important; | |
| font-weight: bold !important; | |
| width: 100% !important; | |
| text-align: center !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| } | |
| .reset-button { | |
| background: linear-gradient(135deg, #6c757d, #495057) !important; | |
| color: white !important; | |
| border-radius: 30px !important; | |
| height: 45px !important; | |
| font-size: 16px !important; | |
| font-weight: bold !important; | |
| width: 100% !important; | |
| text-align: center !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| } | |
| .section-title { | |
| border-bottom: 2px solid #FB7F0D; | |
| font-weight: bold; | |
| padding-bottom: 5px; | |
| margin-bottom: 15px; | |
| } | |
| .loading-indicator { | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| padding: 15px; | |
| background-color: #f8f9fa; | |
| border-radius: 5px; | |
| margin: 10px 0; | |
| border: 1px solid #ddd; | |
| } | |
| .loading-spinner { | |
| border: 4px solid rgba(0, 0, 0, 0.1); | |
| width: 24px; | |
| height: 24px; | |
| border-radius: 50%; | |
| border-left-color: #FB7F0D; | |
| animation: spin 1s linear infinite; | |
| margin-right: 10px; | |
| } | |
| @keyframes spin { | |
| 0% { transform: rotate(0deg); } | |
| 100% { transform: rotate(360deg); } | |
| } | |
| .progress-bar { | |
| height: 10px; | |
| background-color: #FB7F0D; | |
| border-radius: 5px; | |
| width: 0%; | |
| animation: progressAnim 2s ease-in-out infinite; | |
| } | |
| @keyframes progressAnim { | |
| 0% { width: 10%; } | |
| 50% { width: 70%; } | |
| 100% { width: 10%; } | |
| } | |
| .empty-table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-size: 14px; | |
| margin-top: 20px; | |
| } | |
| .empty-table th { | |
| background-color: #FB7F0D; | |
| color: white; | |
| text-align: left; | |
| padding: 12px; | |
| border: 1px solid #ddd; | |
| } | |
| .empty-table td { | |
| padding: 10px; | |
| border: 1px solid #ddd; | |
| text-align: center; | |
| color: #999; | |
| } | |
| .button-container { | |
| margin-top: 20px; | |
| display: flex; | |
| gap: 15px; | |
| } | |
| .execution-section { | |
| margin-top: 20px; | |
| background-color: #f9f9f9; | |
| border-radius: 8px; | |
| padding: 15px; | |
| border: 1px solid #e5e5e5; | |
| } | |
| .session-info { | |
| background-color: #e8f4f8; | |
| padding: 8px 12px; | |
| border-radius: 4px; | |
| font-size: 12px; | |
| color: #0c5460; | |
| margin-bottom: 10px; | |
| text-align: center; | |
| } | |
| """ | |
| with gr.Blocks(css=custom_css, theme=gr.themes.Default( | |
| primary_hue="orange", | |
| secondary_hue="orange", | |
| font=[gr.themes.GoogleFont("Noto Sans KR"), "ui-sans-serif", "system-ui"] | |
| )) as demo: | |
| gr.HTML(fontawesome_html) | |
| # μΈμ ID μν (κ° μ¬μ©μλ³λ‘ κ³ μ ) | |
| session_id = gr.State(get_session_id) | |
| # ν€μλ μν κ΄λ¦¬ | |
| keyword_state = gr.State("") | |
| # μ λ ₯ μΉμ | |
| with gr.Column(elem_classes="custom-frame fade-in"): | |
| gr.HTML('<div class="section-title"><i class="fas fa-search"></i> κ²μ μ λ ₯</div>') | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| keyword = gr.Textbox( | |
| label="λ©μΈ ν€μλ", | |
| placeholder="μ: μ€μ§μ΄" | |
| ) | |
| with gr.Column(scale=1): | |
| search_btn = gr.Button( | |
| "λ©μΈν€μλ λΆμ", | |
| elem_classes="custom-button" | |
| ) | |
| with gr.Accordion("μ΅μ μ€μ ", open=False): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| korean_only = gr.Checkbox( | |
| label="νκΈλ§ μΆμΆ", | |
| value=True | |
| ) | |
| with gr.Column(scale=1): | |
| exclude_zero_volume = gr.Checkbox( | |
| label="κ²μλ 0 ν€μλ μ μΈ", | |
| value=False | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| apply_main_keyword = gr.Radio( | |
| ["λ©μΈν€μλ μ μ©", "λ©μΈν€μλ λ―Έμ μ©"], | |
| label="μ‘°ν© λ°©μ", | |
| value="λ©μΈν€μλ μ μ©" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.HTML("") | |
| # μ§ν μν νμ μΉμ | |
| with gr.Column(elem_classes="custom-frame fade-in", visible=False) as progress_section: | |
| gr.HTML('<div class="section-title"><i class="fas fa-spinner"></i> λΆμ μ§ν μν</div>') | |
| progress_html = gr.HTML(""" | |
| <div style="padding: 15px; background-color: #f9f9f9; border-radius: 5px; margin: 10px 0; border: 1px solid #ddd;"> | |
| <div style="margin-bottom: 10px; display: flex; align-items: center;"> | |
| <i class="fas fa-spinner fa-spin" style="color: #FB7F0D; margin-right: 10px;"></i> | |
| <span>ν€μλ λ°μ΄ν°λ₯Ό λΆμμ€μ λλ€. μ μλ§ κΈ°λ€λ €μ£ΌμΈμ...</span> | |
| </div> | |
| <div style="background-color: #e9ecef; height: 10px; border-radius: 5px; overflow: hidden;"> | |
| <div class="progress-bar"></div> | |
| </div> | |
| </div> | |
| """) | |
| # λ©μΈν€μλ λΆμ κ²°κ³Ό μΉμ | |
| with gr.Column(elem_classes="custom-frame fade-in") as main_keyword_section: | |
| gr.HTML('<div class="section-title"><i class="fas fa-table"></i> λ©μΈν€μλ λΆμ κ²°κ³Ό</div>') | |
| empty_table_html = gr.HTML(""" | |
| <table class="empty-table"> | |
| <thead> | |
| <tr> | |
| <th>μλ²</th> | |
| <th>μ‘°ν© ν€μλ</th> | |
| <th>PCκ²μλ</th> | |
| <th>λͺ¨λ°μΌκ²μλ</th> | |
| <th>μ΄κ²μλ</th> | |
| <th>κ²μλꡬκ°</th> | |
| <th>ν€μλ μ¬μ©μμμ</th> | |
| <th>ν€μλ μ¬μ©νμ</th> | |
| <th>μν λ±λ‘ μΉ΄ν κ³ λ¦¬</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr> | |
| <td colspan="9" style="padding: 30px; text-align: center;"> | |
| κ²μμ μ€ννλ©΄ μ¬κΈ°μ κ²°κ³Όκ° νμλ©λλ€ | |
| </td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| """) | |
| with gr.Column(visible=False) as keyword_analysis_section: | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| category_filter = gr.Dropdown( | |
| choices=["μ 체 보기"], | |
| label="μΉ΄ν κ³ λ¦¬ νν°", | |
| value="μ 체 보기", | |
| interactive=True | |
| ) | |
| with gr.Column(scale=1): | |
| total_volume_sort = gr.Dropdown( | |
| choices=["μ λ ¬ μμ", "μ€λ¦μ°¨μ", "λ΄λ¦Όμ°¨μ"], | |
| label="μ΄κ²μλ μ λ ¬", | |
| value="μ λ ¬ μμ", | |
| interactive=True | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| search_volume_filter = gr.Dropdown( | |
| choices=["μ 체"], | |
| label="κ²μλ κ΅¬κ° νν°", | |
| value="μ 체", | |
| interactive=True | |
| ) | |
| with gr.Column(scale=1): | |
| usage_count_sort = gr.Dropdown( | |
| choices=["μ λ ¬ μμ", "μ€λ¦μ°¨μ", "λ΄λ¦Όμ°¨μ"], | |
| label="ν€μλ μ¬μ©νμ μ λ ¬", | |
| value="μ λ ¬ μμ", | |
| interactive=True | |
| ) | |
| gr.HTML("<div class='data-container' id='table_container'></div>") | |
| table_output = gr.HTML(elem_classes="fade-in") | |
| # μΉ΄ν κ³ λ¦¬ λΆμ μΉμ | |
| with gr.Column(elem_classes="custom-frame fade-in", visible=False) as category_analysis_section: | |
| gr.HTML('<div class="section-title"><i class="fas fa-chart-bar"></i> ν€μλ λΆμ</div>') | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| analysis_keywords = gr.Textbox( | |
| label="ν€μλ μ λ ₯ (μ΅λ 20κ°, μΌν λλ μν°λ‘ ꡬλΆ)", | |
| placeholder="μ: μ€μ§μ΄λ³Άμ, μ€μ§μ΄ μμ§, μ€μ§μ΄ μ리...", | |
| lines=5 | |
| ) | |
| with gr.Column(scale=1): | |
| selected_category = gr.Dropdown( | |
| label="λΆμν μΉ΄ν κ³ λ¦¬(λΆμ μ λ°λμ μ νν΄μ£ΌμΈμ)", | |
| choices=["μ 체 보기"], | |
| value="μ 체 보기", | |
| interactive=True | |
| ) | |
| # μ€ν μΉμ | |
| with gr.Column(elem_classes="execution-section", visible=False) as execution_section: | |
| gr.HTML('<div class="section-title"><i class="fas fa-play-circle"></i> μ€ν</div>') | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| analyze_btn = gr.Button( | |
| "μΉ΄ν κ³ λ¦¬ μΌμΉ λΆμ", | |
| elem_classes=["execution-button", "primary-button"] | |
| ) | |
| with gr.Column(scale=1): | |
| reset_btn = gr.Button( | |
| "λͺ¨λ μ λ ₯ μ΄κΈ°ν", | |
| elem_classes=["execution-button", "secondary-button"] | |
| ) | |
| # λΆμ κ²°κ³Ό μΆλ ₯ μΉμ | |
| with gr.Column(elem_classes="custom-frame fade-in", visible=False) as analysis_output_section: | |
| gr.HTML('<div class="section-title"><i class="fas fa-list-ul"></i> λΆμ κ²°κ³Ό μμ½</div>') | |
| analysis_result = gr.HTML(elem_classes="fade-in") | |
| with gr.Row(): | |
| download_output = gr.File( | |
| label="ν€μλ λͺ©λ‘ λ€μ΄λ‘λ", | |
| visible=True | |
| ) | |
| # μν μ μ₯μ© λ³μ | |
| state_df = gr.State() | |
| # μ΄λ²€νΈ μ°κ²° - λͺ¨λ ν¨μμ session_id μΆκ° | |
| search_btn.click( | |
| fn=search_with_loading, | |
| inputs=[keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id], | |
| outputs=[progress_section, empty_table_html] | |
| ).then( | |
| fn=process_search_results, | |
| inputs=[keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id], | |
| outputs=[ | |
| table_output, category_filter, search_volume_filter, | |
| state_df, selected_category, download_output, | |
| keyword_analysis_section, category_analysis_section, | |
| progress_section, empty_table_html, execution_section, | |
| keyword_state | |
| ] | |
| ) | |
| # νν° λ° μ λ ¬ λ³κ²½ μ΄λ²€νΈ μ°κ²° - session_id μΆκ° | |
| category_filter.change( | |
| fn=filter_and_sort_table, | |
| inputs=[ | |
| state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
| total_volume_sort, usage_count_sort, | |
| search_volume_filter, exclude_zero_volume, session_id | |
| ], | |
| outputs=[table_output] | |
| ) | |
| category_filter.change( | |
| fn=update_category_selection, | |
| inputs=[category_filter, session_id], | |
| outputs=[selected_category] | |
| ) | |
| total_volume_sort.change( | |
| fn=filter_and_sort_table, | |
| inputs=[ | |
| state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
| total_volume_sort, usage_count_sort, | |
| search_volume_filter, exclude_zero_volume, session_id | |
| ], | |
| outputs=[table_output] | |
| ) | |
| usage_count_sort.change( | |
| fn=filter_and_sort_table, | |
| inputs=[ | |
| state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
| total_volume_sort, usage_count_sort, | |
| search_volume_filter, exclude_zero_volume, session_id | |
| ], | |
| outputs=[table_output] | |
| ) | |
| search_volume_filter.change( | |
| fn=filter_and_sort_table, | |
| inputs=[ | |
| state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
| total_volume_sort, usage_count_sort, | |
| search_volume_filter, exclude_zero_volume, session_id | |
| ], | |
| outputs=[table_output] | |
| ) | |
| exclude_zero_volume.change( | |
| fn=filter_and_sort_table, | |
| inputs=[ | |
| state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
| total_volume_sort, usage_count_sort, | |
| search_volume_filter, exclude_zero_volume, session_id | |
| ], | |
| outputs=[table_output] | |
| ) | |
| # μΉ΄ν κ³ λ¦¬ λΆμ λ²νΌ μ΄λ²€νΈ - session_id μΆκ° | |
| analyze_btn.click( | |
| fn=analyze_with_loading, | |
| inputs=[analysis_keywords, selected_category, state_df, session_id], | |
| outputs=[progress_section] | |
| ).then( | |
| fn=process_analyze_results, | |
| inputs=[analysis_keywords, selected_category, state_df, session_id], | |
| outputs=[analysis_result, download_output, analysis_output_section, progress_section] | |
| ) | |
| # 리μ λ²νΌ μ΄λ²€νΈ μ°κ²° - session_id μΆκ° | |
| reset_btn.click( | |
| fn=reset_interface, | |
| inputs=[session_id], | |
| outputs=[ | |
| keyword, korean_only, exclude_zero_volume, apply_main_keyword, | |
| table_output, category_filter, category_filter, | |
| search_volume_filter, search_volume_filter, | |
| total_volume_sort, usage_count_sort, | |
| state_df, selected_category, selected_category, | |
| analysis_keywords, analysis_result, download_output, | |
| keyword_analysis_section, analysis_output_section, | |
| keyword_state | |
| ] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| # ========== μμ μ μ 체 μ΄κΈ°ν ========== | |
| logger.info("π λ©μΈν€μλ λΆμ μ ν리μΌμ΄μ μμ...") | |
| # 1. 첫 λ²μ§Έ: νκΉ νμ΄μ€ μμ ν΄λ μ 리 λ° νκ²½ μ€μ | |
| app_temp_dir = cleanup_on_startup() | |
| # 2. μΈμ μ 리 μ€μΌμ€λ¬ μμ | |
| start_session_cleanup_scheduler() | |
| # 3. API μ€μ μ΄κΈ°ν | |
| try: | |
| api_utils.initialize_api_configs() | |
| except Exception as e: | |
| logger.warning(f"API μ€μ μ΄κΈ°ν μ€ μ€λ₯ (κ³μ μ§ν): {e}") | |
| # 4. Gemini λͺ¨λΈ μ΄κΈ°ν | |
| try: | |
| gemini_model = text_utils.get_gemini_model() | |
| except Exception as e: | |
| logger.warning(f"Gemini λͺ¨λΈ μ΄κΈ°ν μ€ μ€λ₯ (κ³μ μ§ν): {e}") | |
| logger.info("===== λ©ν°μ μ λ©μΈν€μλ λΆμ Application Startup at %s =====", time.strftime("%Y-%m-%d %H:%M:%S")) | |
| logger.info(f"π μμ νμΌ μ μ₯ μμΉ: {app_temp_dir}") | |
| # ========== μ± μ€ν ========== | |
| try: | |
| app = create_app() | |
| app.launch( | |
| share=False, # 보μμ μν΄ share λΉνμ±ν | |
| server_name="0.0.0.0", # λͺ¨λ IPμμ μ κ·Ό νμ© | |
| server_port=7860, # ν¬νΈ μ§μ | |
| max_threads=40, # λ©ν°μ μ λ₯Ό μν μ€λ λ μ μ¦κ° | |
| auth=None, # νμμ μΈμ¦ μΆκ° κ°λ₯ | |
| show_error=True, # μλ¬ νμ | |
| quiet=False, # λ‘κ·Έ νμ | |
| favicon_path=None, # νλΉμ½ μ€μ | |
| ssl_verify=False # SSL κ²μ¦ λΉνμ±ν (κ°λ°μ©) | |
| ) | |
| except Exception as e: | |
| logger.error(f"μ ν리μΌμ΄μ μ€ν μ€ν¨: {e}") | |
| raise | |
| finally: | |
| # μ ν리μΌμ΄μ μ’ λ£ μ μ 리 | |
| logger.info("π§Ή μ ν리μΌμ΄μ μ’ λ£ - μ΅μ’ μ 리 μμ ...") | |
| try: | |
| cleanup_huggingface_temp_folders() | |
| if os.path.exists(app_temp_dir): | |
| shutil.rmtree(app_temp_dir, ignore_errors=True) | |
| logger.info("β μ΅μ’ μ 리 μλ£") | |
| except Exception as e: | |
| logger.error(f"μ΅μ’ μ 리 μ€ μ€λ₯: {e}") |