|
|
""" |
|
|
ν€μλ μ²λ¦¬ κ΄λ ¨ κΈ°λ₯ - μλ€ μ‘°ν© μ€ λμ κ²μλλ§ μ ν, μΉ΄ν
κ³ λ¦¬ νλͺ© μ κ±° |
|
|
- ν€μλ μΆμΆ λ° μ‘°ν© |
|
|
- κ²μ κ²°κ³Ό μ²λ¦¬ |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
import re |
|
|
from collections import defaultdict, Counter |
|
|
import text_utils |
|
|
import keyword_search |
|
|
import product_search |
|
|
import logging |
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
logger.setLevel(logging.INFO) |
|
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
|
handler = logging.StreamHandler() |
|
|
handler.setFormatter(formatter) |
|
|
logger.addHandler(handler) |
|
|
|
|
|
def process_search_results(search_results, current_keyword="", exclude_zero_volume=True): |
|
|
""" |
|
|
κ²μ κ²°κ³Όμμ ν€μλμ μΉ΄ν
κ³ λ¦¬ μ 보 μΆμΆ λ° μ²λ¦¬ - μλ€ μ‘°ν© μ€ λμ κ²μλλ§ μ ν |
|
|
|
|
|
Args: |
|
|
search_results (dict): κ²μ κ²°κ³Ό μ 보 |
|
|
current_keyword (str): νμ¬ κ²μ μ€μΈ ν€μλ |
|
|
exclude_zero_volume (bool): κ²μλμ΄ 0μΈ ν€μλ μ μΈ μ¬λΆ |
|
|
|
|
|
Returns: |
|
|
dict: μ²λ¦¬λ κ²°κ³Ό |
|
|
""" |
|
|
logger.info("\n===== κ²μ κ²°κ³Ό μ²λ¦¬ μμ =====") |
|
|
logger.info(f"νμ¬ ν€μλ: '{current_keyword}'") |
|
|
logger.info(f"κ²μλ 0 ν€μλ μ μΈ: {exclude_zero_volume}") |
|
|
|
|
|
if not search_results or not search_results.get("product_list"): |
|
|
logger.warning("κ²μ κ²°κ³Όκ° μμ΅λλ€.") |
|
|
return { |
|
|
"products_df": None, |
|
|
"keywords_df": None, |
|
|
"categories": ["μ 체 보기"], |
|
|
"message": "κ²μ κ²°κ³Όκ° μμ΅λλ€." |
|
|
} |
|
|
|
|
|
product_list = search_results["product_list"] |
|
|
combo_candidates = search_results["combo_candidates"] |
|
|
category_counter = search_results["category_counter"] |
|
|
keyword_indices = search_results["keyword_indices"] |
|
|
keyword_pairs = search_results.get("keyword_pairs", {}) |
|
|
|
|
|
logger.info(f"κ²μ κ²°κ³Ό - μν μ: {len(product_list)}κ°") |
|
|
logger.info(f"κ²μ κ²°κ³Ό - μ‘°ν© ν보 μ: {len(combo_candidates)}κ°") |
|
|
logger.info(f"κ²μ κ²°κ³Ό - μΉ΄ν
κ³ λ¦¬ μ: {len(category_counter)}κ°") |
|
|
|
|
|
|
|
|
df_products = pd.DataFrame(product_list) |
|
|
|
|
|
|
|
|
api_to_ui_keywords = {} |
|
|
|
|
|
for api_keyword in combo_candidates.keys(): |
|
|
|
|
|
if current_keyword and current_keyword in api_keyword: |
|
|
|
|
|
if api_keyword == current_keyword: |
|
|
api_to_ui_keywords[api_keyword] = current_keyword |
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
ui_keyword = api_keyword |
|
|
|
|
|
if " " not in api_keyword: |
|
|
|
|
|
if api_keyword.startswith(current_keyword): |
|
|
|
|
|
prefix = current_keyword |
|
|
suffix = api_keyword[len(current_keyword):] |
|
|
if suffix: |
|
|
ui_keyword = f"{prefix} {suffix}" |
|
|
elif api_keyword.endswith(current_keyword): |
|
|
|
|
|
prefix = api_keyword[:-len(current_keyword)] |
|
|
suffix = current_keyword |
|
|
if prefix: |
|
|
ui_keyword = f"{prefix} {suffix}" |
|
|
else: |
|
|
|
|
|
idx = api_keyword.find(current_keyword) |
|
|
if idx > 0: |
|
|
prefix = api_keyword[:idx] |
|
|
middle = current_keyword |
|
|
suffix = api_keyword[idx+len(current_keyword):] |
|
|
ui_keyword = f"{prefix} {middle}" |
|
|
if suffix: |
|
|
ui_keyword += f" {suffix}" |
|
|
|
|
|
api_to_ui_keywords[api_keyword] = ui_keyword |
|
|
else: |
|
|
|
|
|
api_to_ui_keywords[api_keyword] = api_keyword |
|
|
|
|
|
|
|
|
logger.info(f"\nκ²μλ μ‘°ν λμ ν€μλ μ: {len(combo_candidates)}κ°") |
|
|
search_volumes = keyword_search.fetch_all_search_volumes(list(combo_candidates.keys())) |
|
|
logger.info(f"κ²μλ μ‘°ν μλ£: {len(search_volumes)}κ° κ²°κ³Ό") |
|
|
|
|
|
|
|
|
if keyword_pairs and current_keyword: |
|
|
logger.info("\n=== μλ€ μ‘°ν© μ€ λμ κ²μλ μ ν ===") |
|
|
filtered_candidates = {} |
|
|
|
|
|
|
|
|
main_api = current_keyword.replace(" ", "") |
|
|
if main_api in combo_candidates: |
|
|
filtered_candidates[main_api] = combo_candidates[main_api] |
|
|
logger.info(f"λ©μΈ ν€μλ μ μ§: '{current_keyword}'") |
|
|
|
|
|
|
|
|
for api_kw, categories in combo_candidates.items(): |
|
|
ui_kw = api_to_ui_keywords[api_kw] |
|
|
if current_keyword in ui_kw and api_kw != main_api and api_kw not in [pair_info["front"].replace(" ", "") for pair_info in keyword_pairs.values()] and api_kw not in [pair_info["back"].replace(" ", "") for pair_info in keyword_pairs.values()]: |
|
|
filtered_candidates[api_kw] = categories |
|
|
logger.info(f"λ©μΈ ν€μλ ν¬ν¨ 볡ν©μ΄ μ μ§: '{ui_kw}'") |
|
|
|
|
|
|
|
|
for base_word, pair_info in keyword_pairs.items(): |
|
|
front_kw = pair_info["front"] |
|
|
back_kw = pair_info["back"] |
|
|
|
|
|
front_api = front_kw.replace(" ", "") |
|
|
back_api = back_kw.replace(" ", "") |
|
|
|
|
|
front_vol = search_volumes.get(front_api, {}).get("μ΄κ²μλ", 0) |
|
|
back_vol = search_volumes.get(back_api, {}).get("μ΄κ²μλ", 0) |
|
|
|
|
|
|
|
|
if front_vol > back_vol: |
|
|
selected_api = front_api |
|
|
selected_kw = front_kw |
|
|
selected_vol = front_vol |
|
|
removed_kw = back_kw |
|
|
removed_vol = back_vol |
|
|
elif back_vol > front_vol: |
|
|
selected_api = back_api |
|
|
selected_kw = back_kw |
|
|
selected_vol = back_vol |
|
|
removed_kw = front_kw |
|
|
removed_vol = front_vol |
|
|
elif front_vol == back_vol and front_vol > 0: |
|
|
|
|
|
selected_api = back_api |
|
|
selected_kw = back_kw |
|
|
selected_vol = back_vol |
|
|
removed_kw = front_kw |
|
|
removed_vol = front_vol |
|
|
else: |
|
|
|
|
|
logger.info(f" '{base_word}' μ‘°ν©: λ λ€ κ²μλ 0μΌλ‘ μ μΈ") |
|
|
continue |
|
|
|
|
|
|
|
|
if selected_vol > 0 or not exclude_zero_volume: |
|
|
filtered_candidates[selected_api] = combo_candidates[selected_api] |
|
|
logger.info(f" '{base_word}' μ‘°ν© μ ν: '{selected_kw}' ({selected_vol:,}) > '{removed_kw}' ({removed_vol:,})") |
|
|
else: |
|
|
logger.info(f" '{base_word}' μ‘°ν©: κ²μλ 0μΌλ‘ μ μΈ") |
|
|
|
|
|
|
|
|
combo_candidates = filtered_candidates |
|
|
logger.info(f"μλ€ μ‘°ν© νν°λ§ μλ£: {len(combo_candidates)}κ° ν€μλ μ ν") |
|
|
|
|
|
|
|
|
zero_volume_count = sum(1 for vol in search_volumes.values() if vol.get("μ΄κ²μλ", 0) == 0) |
|
|
logger.info(f"κ²μλ 0μΈ ν€μλ μ: {zero_volume_count}κ° ({zero_volume_count/max(1, len(search_volumes))*100:.1f}%)") |
|
|
|
|
|
|
|
|
normalized_keywords = {} |
|
|
|
|
|
for api_keyword in combo_candidates.keys(): |
|
|
ui_keyword = api_to_ui_keywords[api_keyword] |
|
|
|
|
|
|
|
|
pc_count = 0 |
|
|
mobile_count = 0 |
|
|
total_count = 0 |
|
|
if api_keyword in search_volumes: |
|
|
pc_count = search_volumes[api_keyword]["PCκ²μλ"] |
|
|
mobile_count = search_volumes[api_keyword]["λͺ¨λ°μΌκ²μλ"] |
|
|
total_count = search_volumes[api_keyword]["μ΄κ²μλ"] |
|
|
|
|
|
|
|
|
if exclude_zero_volume and total_count == 0: |
|
|
logger.debug(f" - '{ui_keyword}' (API: '{api_keyword}') - κ²μλ 0μΌλ‘ μ μΈλ¨") |
|
|
continue |
|
|
|
|
|
|
|
|
words = ui_keyword.split() |
|
|
normalized = "".join(sorted(words)) |
|
|
|
|
|
|
|
|
if normalized in normalized_keywords: |
|
|
existing_api_keyword, existing_ui_keyword, existing_total = normalized_keywords[normalized] |
|
|
if total_count > existing_total: |
|
|
logger.debug(f" - μ€λ³΅ ν€μλ λ체: '{existing_ui_keyword}' ({existing_total}) -> '{ui_keyword}' ({total_count})") |
|
|
normalized_keywords[normalized] = (api_keyword, ui_keyword, total_count) |
|
|
else: |
|
|
logger.debug(f" - μ€λ³΅ ν€μλ μ μΈ: '{ui_keyword}' ({total_count}) < '{existing_ui_keyword}' ({existing_total})") |
|
|
else: |
|
|
normalized_keywords[normalized] = (api_keyword, ui_keyword, total_count) |
|
|
logger.debug(f" - ν€μλ μΆκ°: '{ui_keyword}' (κ²μλ: {total_count})") |
|
|
|
|
|
logger.info(f"\nμ€λ³΅ μ κ±° ν ν€μλ μ: {len(normalized_keywords)}κ°") |
|
|
|
|
|
|
|
|
final_combos = [] |
|
|
for normalized, (api_keyword, ui_keyword, total_count) in normalized_keywords.items(): |
|
|
|
|
|
|
|
|
readable = fix_keyword_order(ui_keyword, current_keyword) |
|
|
|
|
|
|
|
|
pc_count = 0 |
|
|
mobile_count = 0 |
|
|
if api_keyword in search_volumes: |
|
|
pc_count = search_volumes[api_keyword]["PCκ²μλ"] |
|
|
mobile_count = search_volumes[api_keyword]["λͺ¨λ°μΌκ²μλ"] |
|
|
total_count = search_volumes[api_keyword]["μ΄κ²μλ"] |
|
|
|
|
|
|
|
|
search_volume_range = text_utils.get_search_volume_range(total_count) |
|
|
|
|
|
|
|
|
base_word = readable.replace(current_keyword, "").strip() if current_keyword else readable |
|
|
ranks = [] |
|
|
if base_word in keyword_indices: |
|
|
ranks = [idx + 1 for idx in keyword_indices[base_word]] |
|
|
elif api_keyword in keyword_indices: |
|
|
ranks = [idx + 1 for idx in keyword_indices.get(api_keyword, [])] |
|
|
|
|
|
ranks_str = ", ".join(map(str, ranks)) if ranks else "-" |
|
|
usage_count = len(ranks) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
final_combos.append({ |
|
|
"μ‘°ν© ν€μλ": readable.strip(), |
|
|
"PCκ²μλ": pc_count, |
|
|
"λͺ¨λ°μΌκ²μλ": mobile_count, |
|
|
"μ΄κ²μλ": total_count, |
|
|
"κ²μλꡬκ°": search_volume_range, |
|
|
"ν€μλ μ¬μ©μμμ": ranks_str, |
|
|
"ν€μλ μ¬μ©νμ": usage_count |
|
|
|
|
|
}) |
|
|
|
|
|
|
|
|
df_keywords = pd.DataFrame(final_combos) |
|
|
|
|
|
|
|
|
if not df_keywords.empty: |
|
|
df_keywords = df_keywords.sort_values(by="μ΄κ²μλ", ascending=False) |
|
|
|
|
|
df_keywords = df_keywords.reset_index(drop=True) |
|
|
|
|
|
|
|
|
logger.info(f"\nμμ±λ ν€μλ λ°μ΄ν°νλ μ ν μ: {len(df_keywords)}") |
|
|
if not df_keywords.empty: |
|
|
logger.debug(f"λ°μ΄ν°νλ μ μ΄: {df_keywords.columns.tolist()}") |
|
|
logger.info(f"μ΄ {len(df_keywords)}κ° ν€μλ μμ± μλ£") |
|
|
|
|
|
|
|
|
category_with_counts = [f"{cat} ({category_counter[cat]})" for cat in sorted(category_counter.keys())] |
|
|
category_with_counts.insert(0, "μ 체 보기") |
|
|
|
|
|
logger.info(f"μΉ΄ν
κ³ λ¦¬ μ: {len(category_counter)}κ°") |
|
|
logger.info("===== κ²μ κ²°κ³Ό μ²λ¦¬ μλ£ =====\n") |
|
|
|
|
|
return { |
|
|
"products_df": df_products, |
|
|
"keywords_df": df_keywords, |
|
|
"categories": category_with_counts, |
|
|
"message": "β
κ²μμ΄ μλ£λμμ΅λλ€. μλμμ ν€μλλ₯Ό νμΈνμΈμ." |
|
|
} |
|
|
|
|
|
def filter_and_sort_table(df, selected_cat, keyword_sort, total_volume_sort, usage_count_sort, selected_volume_range, exclude_zero_volume=False): |
|
|
"""ν
μ΄λΈ νν°λ§ λ° μ λ ¬ ν¨μ (κ²μλ 0 μ μΈ κΈ°λ₯ μΆκ°)""" |
|
|
if df is None or df.empty: |
|
|
return "" |
|
|
|
|
|
|
|
|
filtered = df.copy() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if selected_volume_range and selected_volume_range != "μ 체": |
|
|
filtered = filtered[filtered["κ²μλꡬκ°"] == selected_volume_range] |
|
|
|
|
|
|
|
|
if exclude_zero_volume: |
|
|
filtered = filtered[filtered["μ΄κ²μλ"] > 0] |
|
|
logger.info(f"κ²μλ 0 μ μΈ νν° μ μ© - λ¨μ ν€μλ μ: {len(filtered)}") |
|
|
|
|
|
|
|
|
if keyword_sort != "μ λ ¬ μμ": |
|
|
is_ascending = keyword_sort == "μ€λ¦μ°¨μ" |
|
|
filtered = filtered.sort_values(by="μ‘°ν© ν€μλ", ascending=is_ascending) |
|
|
|
|
|
if total_volume_sort != "μ λ ¬ μμ": |
|
|
is_ascending = total_volume_sort == "μ€λ¦μ°¨μ" |
|
|
filtered = filtered.sort_values(by="μ΄κ²μλ", ascending=is_ascending) |
|
|
|
|
|
|
|
|
if usage_count_sort != "μ λ ¬ μμ": |
|
|
is_ascending = usage_count_sort == "μ€λ¦μ°¨μ" |
|
|
filtered = filtered.sort_values(by="ν€μλ μ¬μ©νμ", ascending=is_ascending) |
|
|
|
|
|
|
|
|
logger.info(f"νν° μ μ© ν - νν°λ§λ DataFrame ν μ: {len(filtered)}") |
|
|
|
|
|
|
|
|
filtered = filtered.reset_index(drop=True) |
|
|
|
|
|
from export_utils import create_table_without_checkboxes |
|
|
|
|
|
|
|
|
html = create_table_without_checkboxes(filtered) |
|
|
|
|
|
return html |
|
|
|
|
|
def fix_keyword_order(keyword, main_keyword): |
|
|
""" |
|
|
ν€μλ μμλ₯Ό μμ νλ ν¨μ - νκΈμ΄ μμ μ€κ³ μμ΄/μ«μκ° λ€μ μ€λλ‘ ν¨ |
|
|
|
|
|
Args: |
|
|
keyword (str): μμ ν ν€μλ |
|
|
main_keyword (str): λ©μΈ ν€μλ |
|
|
|
|
|
Returns: |
|
|
str: μμκ° μμ λ ν€μλ |
|
|
""" |
|
|
|
|
|
|
|
|
pattern_combined = re.compile(r'^([0-9]+[a-zA-Z]*)([κ°-ν£]+.*)$') |
|
|
match = pattern_combined.match(keyword) |
|
|
if match: |
|
|
number_part = match.group(1) |
|
|
korean_part = match.group(2) |
|
|
fixed_keyword = f"{korean_part} {number_part}" |
|
|
logger.debug(f"λΆμ΄μλ ν¨ν΄ μμ : '{keyword}' -> '{fixed_keyword}'") |
|
|
return fixed_keyword |
|
|
|
|
|
|
|
|
if ' ' in keyword: |
|
|
parts = keyword.split() |
|
|
|
|
|
|
|
|
korean_parts = [] |
|
|
non_korean_parts = [] |
|
|
|
|
|
for part in parts: |
|
|
if re.search(r'[κ°-ν£]', part): |
|
|
korean_parts.append(part) |
|
|
else: |
|
|
non_korean_parts.append(part) |
|
|
|
|
|
|
|
|
if not korean_parts or not non_korean_parts: |
|
|
return keyword |
|
|
|
|
|
|
|
|
fixed_keyword = " ".join(korean_parts + non_korean_parts) |
|
|
|
|
|
|
|
|
if fixed_keyword != keyword: |
|
|
logger.debug(f"ν€μλ μμ μμ : '{keyword}' -> '{fixed_keyword}'") |
|
|
|
|
|
return fixed_keyword |
|
|
|
|
|
return keyword |