Spaces:

Kims12
/

N_B_analysis

Build error

App Files Files Community

Kims12 commited on Feb 11, 2025

Commit

2c541cf

verified ·

1 Parent(s): 8ac20cc

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -86

app.py CHANGED Viewed

@@ -126,7 +126,7 @@ def analyze_text(text: str):
     return df, temp_file.name
-# [참조코드-2] 네이버 광고 API: 서명 생성 및 헤더 구성
 def generate_signature(timestamp, method, uri, secret_key):
     message = f"{timestamp}.{method}.{uri}"
     digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
@@ -143,7 +143,6 @@ def get_header(method, uri, api_key, secret_key, customer_id):
         "X-Signature": signature
     }
-# 기존 단일 키워드용 함수 (참고용)
 def fetch_related_keywords(keyword):
     debug_log(f"fetch_related_keywords 호출, 키워드: {keyword}")
     API_KEY = os.environ["NAVER_API_KEY"]
@@ -180,83 +179,6 @@ def fetch_related_keywords(keyword):
     debug_log("fetch_related_keywords 완료")
     return result_df
-# 신규 추가: 키워드 10개씩 그룹으로 묶어 한 번의 API 호출을 하는 함수
-# (단, 각 그룹은 순차적으로 호출됨)
-def fetch_related_keywords_batch(keywords: list):
-    debug_log(f"fetch_related_keywords_batch 호출, 키워드 그룹: {keywords}")
-    API_KEY = os.environ["NAVER_API_KEY"]
-    SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
-    CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
-    BASE_URL = "https://api.naver.com"
-    uri = "/keywordstool"
-    method = "GET"
-    headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
-    params = {
-        "hintKeywords": keywords,  # 리스트 그대로 전달 (최대 10개)
-        "showDetail": "1"
-    }
-    response = requests.get(BASE_URL + uri, params=params, headers=headers)
-    data = response.json()
-    if "keywordList" not in data:
-        return pd.DataFrame()
-    df = pd.DataFrame(data["keywordList"])
-    if len(df) > 100:
-        df = df.head(100)
-    def parse_count(x):
-        try:
-            return int(str(x).replace(",", ""))
-        except:
-            return 0
-    df["PC월검색량"] = df["monthlyPcQcCnt"].apply(parse_count)
-    df["모바일월검색량"] = df["monthlyMobileQcCnt"].apply(parse_count)
-    df["토탈월검색량"] = df["PC월검색량"] + df["모바일월검색량"]
-    df.rename(columns={"relKeyword": "정보키워드"}, inplace=True)
-    result_df = df[["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"]]
-    debug_log("fetch_related_keywords_batch 완료")
-    return result_df
-# process_keyword 함수를 그룹별로(각 그룹은 순차적으로) 처리하도록 개선
-def process_keyword(keywords: str, include_related: bool):
-    debug_log(f"process_keyword 호출, 키워드들: {keywords}, 연관검색어 포함: {include_related}")
-    input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
-    groups = [input_keywords[i:i+10] for i in range(0, len(input_keywords), 10)]
-    result_dfs = []
-    # 각 그룹을 순차적으로 처리 (동시에 호출하지 않음)
-    for idx, group in enumerate(groups):
-        debug_log(f"그룹 {idx+1} 처리 시작: {group}")
-        df_batch = fetch_related_keywords_batch(group)
-        if df_batch.empty:
-            continue
-        # 그룹 내 각 키워드에 대해 결과 추출
-        for kw in group:
-            row_kw = df_batch[df_batch["정보키워드"] == kw]
-            if not row_kw.empty:
-                result_dfs.append(row_kw)
-            else:
-                result_dfs.append(df_batch.head(1))
-        # 첫 번째 그룹에 대해서만 연관검색어 옵션 적용 (첫 키워드 제외)
-        if include_related and idx == 0:
-            first_keyword = group[0]
-            df_related = df_batch[df_batch["정보키워드"] != first_keyword]
-            if not df_related.empty:
-                result_dfs.append(df_related)
-        debug_log(f"그룹 {idx+1} 처리 완료")
-    if result_dfs:
-        result_df = pd.concat(result_dfs, ignore_index=True)
-        result_df.drop_duplicates(subset=["정보키워드"], inplace=True)
-    else:
-        result_df = pd.DataFrame(columns=["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"])
-    result_df["블로그문서수"] = result_df["정보키워드"].apply(fetch_blog_count)
-    result_df.sort_values(by="토탈월검색량", ascending=False, inplace=True)
-    debug_log("process_keyword 완료")
-    return result_df, create_excel_file(result_df)
 def fetch_blog_count(keyword):
     debug_log(f"fetch_blog_count 호출, 키워드: {keyword}")
     client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
@@ -283,6 +205,36 @@ def create_excel_file(df):
     debug_log(f"Excel 파일 생성됨: {excel_path}")
     return excel_path
 # [참조코드-1] 및 [참조코드-2]를 활용한 형태소 분석 및 검색량, 블로그문서수 추가 (빈도수1 제거 옵션 포함)
 def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
     debug_log("morphological_analysis_and_enrich 함수 시작")
@@ -313,7 +265,32 @@ def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
     debug_log("morphological_analysis_and_enrich 함수 완료")
     return merged_df, merged_excel_path
-# 새롭게 추가된 기능: 입력한 블로그 링크로부터 스크래핑하여 수정 가능한 텍스트 박스에 출력
 def fetch_blog_content(url: str):
     debug_log("fetch_blog_content 함수 시작")
     content = scrape_naver_blog(url)
@@ -325,23 +302,29 @@ with gr.Blocks(title="네이버 블로그 형태소 분석 스페이스", css=".
     gr.Markdown("# 네이버 블로그 형태소 분석 스페이스")
     with gr.Row():
         blog_url_input = gr.Textbox(label="네이버 블로그 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
-    with gr.Row():
         scrape_button = gr.Button("스크래핑 실행")
     with gr.Row():
         blog_content_box = gr.Textbox(label="블로그 내용 (수정 가능)", lines=10, placeholder="스크래핑된 블로그 내용이 여기에 표시됩니다.")
     with gr.Row():
         remove_freq_checkbox = gr.Checkbox(label="빈도수1 제거", value=False)
     with gr.Row():
         analyze_button = gr.Button("분석 실행")
     with gr.Row():
-        analysis_result = gr.Dataframe(label="분석 결과 (단어, 빈도수, 검색량, 블로그문서수 등)")
     with gr.Row():
-        analysis_excel = gr.File(label="Excel 다운로드")
-    # 스크래핑 실행 시 URL로부��� 블로그 본문 스크래핑 후 수정 가능한 텍스트 박스에 출력
     scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
-    # 분석 실행 시 수정된 블로그 내용을 대상으로 형태소 분석 및 검색량/블로그문서수 조회 진행
-    analyze_button.click(fn=morphological_analysis_and_enrich, inputs=[blog_content_box, remove_freq_checkbox], outputs=[analysis_result, analysis_excel])
 if __name__ == "__main__":
     debug_log("Gradio 앱 실행 시작")

     return df, temp_file.name
+# [참조코드-2] 네이버 광고 API 및 검색량/블로그문서수 조회 기능
 def generate_signature(timestamp, method, uri, secret_key):
     message = f"{timestamp}.{method}.{uri}"
     digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
         "X-Signature": signature
     }
 def fetch_related_keywords(keyword):
     debug_log(f"fetch_related_keywords 호출, 키워드: {keyword}")
     API_KEY = os.environ["NAVER_API_KEY"]
     debug_log("fetch_related_keywords 완료")
     return result_df
 def fetch_blog_count(keyword):
     debug_log(f"fetch_blog_count 호출, 키워드: {keyword}")
     client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
     debug_log(f"Excel 파일 생성됨: {excel_path}")
     return excel_path
+def process_keyword(keywords: str, include_related: bool):
+    debug_log(f"process_keyword 호출, 키워드들: {keywords}, 연관검색어 포함: {include_related}")
+    input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
+    result_dfs = []
+    for idx, kw in enumerate(input_keywords):
+        df_kw = fetch_related_keywords(kw)
+        if df_kw.empty:
+            continue
+        row_kw = df_kw[df_kw["정보키워드"] == kw]
+        if not row_kw.empty:
+            result_dfs.append(row_kw)
+        else:
+            result_dfs.append(df_kw.head(1))
+        if include_related and idx == 0:
+            df_related = df_kw[df_kw["정보키워드"] != kw]
+            if not df_related.empty:
+                result_dfs.append(df_related)
+    if result_dfs:
+        result_df = pd.concat(result_dfs, ignore_index=True)
+        result_df.drop_duplicates(subset=["정보키워드"], inplace=True)
+    else:
+        result_df = pd.DataFrame(columns=["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"])
+    result_df["블로그문서수"] = result_df["정보키워드"].apply(fetch_blog_count)
+    result_df.sort_values(by="토탈월검색량", ascending=False, inplace=True)
+    debug_log("process_keyword 완료")
+    return result_df, create_excel_file(result_df)
 # [참조코드-1] 및 [참조코드-2]를 활용한 형태소 분석 및 검색량, 블로그문서수 추가 (빈도수1 제거 옵션 포함)
 def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
     debug_log("morphological_analysis_and_enrich 함수 시작")
     debug_log("morphological_analysis_and_enrich 함수 완료")
     return merged_df, merged_excel_path
+# 새롭게 추가된 기능 1,2,3: 직접 입력한 키워드(엔터 또는 ','로 구분된 다수의 키워드)가 블로그 본문 내 등장 빈도수를 체크
+def direct_keyword_analysis(text: str, keyword_input: str):
+    debug_log("direct_keyword_analysis 함수 시작")
+    # 엔터 또는 쉼표로 분리하여 키워드 목록 생성
+    keywords = re.split(r'[\n,]+', keyword_input)
+    keywords = [kw.strip() for kw in keywords if kw.strip()]
+    debug_log(f"입력된 키워드 목록: {keywords}")
+    results = []
+    for kw in keywords:
+        count = text.count(kw)
+        results.append((kw, count))
+        debug_log(f"키워드 '{kw}'의 빈도수: {count}")
+    df = pd.DataFrame(results, columns=["키워드", "빈도수"])
+    excel_path = create_excel_file(df)
+    debug_log("direct_keyword_analysis 함수 완료")
+    return df, excel_path
+# 분석 실행 버튼 클릭 시, 수정 가능한 블로그 본문을 대상으로 형태소 분석과 직접 키워드 분석을 함께 진행
+def analyze_combined(blog_text: str, remove_freq1: bool, keyword_input: str):
+    debug_log("analyze_combined 함수 시작")
+    morph_df, morph_excel = morphological_analysis_and_enrich(blog_text, remove_freq1)
+    direct_df, direct_excel = direct_keyword_analysis(blog_text, keyword_input)
+    debug_log("analyze_combined 함수 완료")
+    return morph_df, morph_excel, direct_df, direct_excel
+# 스크래핑 실행: 블로그 링크를 통해 내용을 가져와 수정 가능한 텍스트 박스에 출력
 def fetch_blog_content(url: str):
     debug_log("fetch_blog_content 함수 시작")
     content = scrape_naver_blog(url)
     gr.Markdown("# 네이버 블로그 형태소 분석 스페이스")
     with gr.Row():
         blog_url_input = gr.Textbox(label="네이버 블로그 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
         scrape_button = gr.Button("스크래핑 실행")
     with gr.Row():
         blog_content_box = gr.Textbox(label="블로그 내용 (수정 가능)", lines=10, placeholder="스크래핑된 블로그 내용이 여기에 표시됩니다.")
     with gr.Row():
         remove_freq_checkbox = gr.Checkbox(label="빈도수1 제거", value=False)
+    with gr.Row():
+        keyword_input_box = gr.Textbox(label="직접 키워드 입력 (엔터 또는 ','로 구분)", lines=2, placeholder="예: 키워드1, 키워드2\n키워드3")
     with gr.Row():
         analyze_button = gr.Button("분석 실행")
+    with gr.Markdown("### 형태소 분석 결과")
     with gr.Row():
+        morph_result_df = gr.Dataframe(label="형태소 분석 결과 (단어, 빈도수, 검색량, 블로그문서수 등)")
+        morph_excel_file = gr.File(label="형태소 분석 Excel 다운로드")
+    with gr.Markdown("### 직접 키워드 분석 결과")
     with gr.Row():
+        direct_result_df = gr.Dataframe(label="직접 키워드 분석 결과 (키워드, 빈도수)")
+        direct_excel_file = gr.File(label="직접 키워드 분석 Excel 다운로드")
+    # 스크래핑 실행: URL을 입력하면 블로그 내용을 수정 가능한 텍스트 박스에 채워줌
     scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
+    # 분석 실행: 수정된 블로그 내용과 빈도수1 제거 옵션, 직접 입력 키워드를 대상으로 두 분석을 함께 진행
+    analyze_button.click(fn=analyze_combined, inputs=[blog_content_box, remove_freq_checkbox, keyword_input_box],
+                          outputs=[morph_result_df, morph_excel_file, direct_result_df, direct_excel_file])
 if __name__ == "__main__":
     debug_log("Gradio 앱 실행 시작")