Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,10 +17,11 @@ import base64
|
|
| 17 |
def debug_log(message: str):
|
| 18 |
print(f"[DEBUG] {message}")
|
| 19 |
|
| 20 |
-
# -
|
| 21 |
def scrape_naver_blog(url: str) -> str:
|
| 22 |
debug_log("scrape_naver_blog ํจ์ ์์")
|
| 23 |
debug_log(f"์์ฒญ๋ฐ์ URL: {url}")
|
|
|
|
| 24 |
headers = {
|
| 25 |
"User-Agent": (
|
| 26 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
@@ -28,14 +29,20 @@ def scrape_naver_blog(url: str) -> str:
|
|
| 28 |
"Chrome/96.0.4664.110 Safari/537.36"
|
| 29 |
)
|
| 30 |
}
|
|
|
|
| 31 |
try:
|
|
|
|
| 32 |
response = requests.get(url, headers=headers)
|
| 33 |
debug_log("HTTP GET ์์ฒญ(๋ฉ์ธ ํ์ด์ง) ์๋ฃ")
|
| 34 |
if response.status_code != 200:
|
| 35 |
debug_log(f"์์ฒญ ์คํจ, ์ํ์ฝ๋: {response.status_code}")
|
| 36 |
return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ํ์ฝ๋: {response.status_code}"
|
|
|
|
|
|
|
| 37 |
soup = BeautifulSoup(response.text, "html.parser")
|
| 38 |
debug_log("HTML ํ์ฑ(๋ฉ์ธ ํ์ด์ง) ์๋ฃ")
|
|
|
|
|
|
|
| 39 |
iframe = soup.select_one("iframe#mainFrame")
|
| 40 |
if not iframe:
|
| 41 |
debug_log("iframe#mainFrame ํ๊ทธ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
|
@@ -44,8 +51,12 @@ def scrape_naver_blog(url: str) -> str:
|
|
| 44 |
if not iframe_src:
|
| 45 |
debug_log("iframe src๊ฐ ์กด์ฌํ์ง ์์ต๋๋ค.")
|
| 46 |
return "๋ณธ๋ฌธ iframe์ src๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
|
|
|
|
|
|
| 47 |
parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
|
| 48 |
debug_log(f"iframe ํ์ด์ง ์์ฒญ URL: {parsed_iframe_url}")
|
|
|
|
|
|
|
| 49 |
iframe_response = requests.get(parsed_iframe_url, headers=headers)
|
| 50 |
debug_log("HTTP GET ์์ฒญ(iframe ํ์ด์ง) ์๋ฃ")
|
| 51 |
if iframe_response.status_code != 200:
|
|
@@ -53,6 +64,8 @@ def scrape_naver_blog(url: str) -> str:
|
|
| 53 |
return f"iframe์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ํ์ฝ๋: {iframe_response.status_code}"
|
| 54 |
iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
|
| 55 |
debug_log("HTML ํ์ฑ(iframe ํ์ด์ง) ์๋ฃ")
|
|
|
|
|
|
|
| 56 |
title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
|
| 57 |
title = title_div.get_text(strip=True) if title_div else "์ ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
| 58 |
debug_log(f"์ถ์ถ๋ ์ ๋ชฉ: {title}")
|
|
@@ -62,42 +75,58 @@ def scrape_naver_blog(url: str) -> str:
|
|
| 62 |
else:
|
| 63 |
content = "๋ณธ๋ฌธ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
| 64 |
debug_log("๋ณธ๋ฌธ ์ถ์ถ ์๋ฃ")
|
|
|
|
| 65 |
result = f"[์ ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
|
| 66 |
-
debug_log("์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ ํฉ
|
| 67 |
return result
|
|
|
|
| 68 |
except Exception as e:
|
| 69 |
debug_log(f"์๋ฌ ๋ฐ์: {str(e)}")
|
| 70 |
return f"์คํฌ๋ํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
| 71 |
|
| 72 |
-
# -
|
| 73 |
def analyze_text(text: str):
|
| 74 |
logging.basicConfig(level=logging.DEBUG)
|
| 75 |
logger = logging.getLogger(__name__)
|
| 76 |
logger.debug("์๋ณธ ํ
์คํธ: %s", text)
|
|
|
|
|
|
|
| 77 |
filtered_text = re.sub(r'[^๊ฐ-ํฃ]', '', text)
|
| 78 |
-
logger.debug("ํํฐ๋ง๋ ํ
์คํธ: %s", filtered_text)
|
|
|
|
| 79 |
if not filtered_text:
|
| 80 |
logger.debug("์ ํจํ ํ๊ตญ์ด ํ
์คํธ๊ฐ ์์.")
|
| 81 |
return pd.DataFrame(columns=["๋จ์ด", "๋น๋์"]), ""
|
|
|
|
|
|
|
| 82 |
mecab_instance = mecab.MeCab()
|
| 83 |
tokens = mecab_instance.pos(filtered_text)
|
| 84 |
logger.debug("ํํ์ ๋ถ์ ๊ฒฐ๊ณผ: %s", tokens)
|
|
|
|
| 85 |
freq = {}
|
| 86 |
for word, pos in tokens:
|
| 87 |
-
if word and word.strip()
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
|
| 91 |
-
logger.debug("์ ๋ ฌ๋ ๋จ์ด ๋น๋: %s", sorted_freq)
|
|
|
|
|
|
|
| 92 |
df = pd.DataFrame(sorted_freq, columns=["๋จ์ด", "๋น๋์"])
|
| 93 |
-
logger.debug("
|
|
|
|
|
|
|
| 94 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
|
| 95 |
df.to_excel(temp_file.name, index=False, engine='openpyxl')
|
| 96 |
temp_file.close()
|
| 97 |
logger.debug("Excel ํ์ผ ์์ฑ๋จ: %s", temp_file.name)
|
|
|
|
| 98 |
return df, temp_file.name
|
| 99 |
|
| 100 |
-
# -
|
| 101 |
def generate_signature(timestamp, method, uri, secret_key):
|
| 102 |
message = f"{timestamp}.{method}.{uri}"
|
| 103 |
digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
|
|
@@ -119,6 +148,7 @@ def fetch_related_keywords(keyword):
|
|
| 119 |
API_KEY = os.environ["NAVER_API_KEY"]
|
| 120 |
SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
|
| 121 |
CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
|
|
|
|
| 122 |
BASE_URL = "https://api.naver.com"
|
| 123 |
uri = "/keywordstool"
|
| 124 |
method = "GET"
|
|
@@ -134,11 +164,13 @@ def fetch_related_keywords(keyword):
|
|
| 134 |
df = pd.DataFrame(data["keywordList"])
|
| 135 |
if len(df) > 100:
|
| 136 |
df = df.head(100)
|
|
|
|
| 137 |
def parse_count(x):
|
| 138 |
try:
|
| 139 |
return int(str(x).replace(",", ""))
|
| 140 |
except:
|
| 141 |
return 0
|
|
|
|
| 142 |
df["PC์๊ฒ์๋"] = df["monthlyPcQcCnt"].apply(parse_count)
|
| 143 |
df["๋ชจ๋ฐ์ผ์๊ฒ์๋"] = df["monthlyMobileQcCnt"].apply(parse_count)
|
| 144 |
df["ํ ํ์๊ฒ์๋"] = df["PC์๊ฒ์๋"] + df["๋ชจ๋ฐ์ผ์๊ฒ์๋"]
|
|
@@ -177,6 +209,7 @@ def process_keyword(keywords: str, include_related: bool):
|
|
| 177 |
debug_log(f"process_keyword ํธ์ถ, ํค์๋๋ค: {keywords}, ์ฐ๊ด๊ฒ์์ด ํฌํจ: {include_related}")
|
| 178 |
input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
|
| 179 |
result_dfs = []
|
|
|
|
| 180 |
for idx, kw in enumerate(input_keywords):
|
| 181 |
df_kw = fetch_related_keywords(kw)
|
| 182 |
if df_kw.empty:
|
|
@@ -190,241 +223,73 @@ def process_keyword(keywords: str, include_related: bool):
|
|
| 190 |
df_related = df_kw[df_kw["์ ๋ณดํค์๋"] != kw]
|
| 191 |
if not df_related.empty:
|
| 192 |
result_dfs.append(df_related)
|
|
|
|
| 193 |
if result_dfs:
|
| 194 |
result_df = pd.concat(result_dfs, ignore_index=True)
|
| 195 |
result_df.drop_duplicates(subset=["์ ๋ณดํค์๋"], inplace=True)
|
| 196 |
else:
|
| 197 |
result_df = pd.DataFrame(columns=["์ ๋ณดํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋"])
|
|
|
|
| 198 |
result_df["๋ธ๋ก๊ทธ๋ฌธ์์"] = result_df["์ ๋ณดํค์๋"].apply(fetch_blog_count)
|
| 199 |
result_df.sort_values(by="ํ ํ์๊ฒ์๋", ascending=False, inplace=True)
|
| 200 |
debug_log("process_keyword ์๋ฃ")
|
| 201 |
return result_df, create_excel_file(result_df)
|
| 202 |
|
| 203 |
-
#
|
| 204 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
debug_log("morphological_analysis_and_enrich ํจ์ ์์")
|
| 206 |
df_freq, _ = analyze_text(text)
|
| 207 |
if df_freq.empty:
|
| 208 |
debug_log("ํํ์ ๋ถ์ ๊ฒฐ๊ณผ๊ฐ ๋น ๋ฐ์ดํฐํ๋ ์์
๋๋ค.")
|
| 209 |
return df_freq, ""
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
df_freq = df_freq[df_freq["๋น๋์"] != 1]
|
| 213 |
-
debug_log(f"๋น๋์ 1 ์ ๊ฑฐ ์ ์ฉ๋จ. {before_shape} -> {df_freq.shape}")
|
| 214 |
keywords = "\n".join(df_freq["๋จ์ด"].tolist())
|
| 215 |
debug_log(f"๋ถ์๋ ํค์๋: {keywords}")
|
|
|
|
|
|
|
| 216 |
df_keyword_info, _ = process_keyword(keywords, include_related=False)
|
| 217 |
debug_log("๊ฒ์๋ ๋ฐ ๋ธ๋ก๊ทธ๋ฌธ์์ ์กฐํ ์๋ฃ")
|
|
|
|
|
|
|
| 218 |
merged_df = pd.merge(df_freq, df_keyword_info, left_on="๋จ์ด", right_on="์ ๋ณดํค์๋", how="left")
|
| 219 |
merged_df.drop(columns=["์ ๋ณดํค์๋"], inplace=True)
|
|
|
|
|
|
|
| 220 |
merged_excel_path = create_excel_file(merged_df)
|
| 221 |
debug_log("morphological_analysis_and_enrich ํจ์ ์๋ฃ")
|
| 222 |
return merged_df, merged_excel_path
|
| 223 |
|
| 224 |
-
#
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
debug_log(f"์
๋ ฅ๋ ํค์๋ ๋ชฉ๋ก: {keywords}")
|
| 230 |
-
results = []
|
| 231 |
-
for kw in keywords:
|
| 232 |
-
count = text.count(kw)
|
| 233 |
-
results.append((kw, count))
|
| 234 |
-
debug_log(f"ํค์๋ '{kw}'์ ๋น๋์: {count}")
|
| 235 |
-
df = pd.DataFrame(results, columns=["ํค์๋", "๋น๋์"])
|
| 236 |
-
excel_path = create_excel_file(df)
|
| 237 |
-
debug_log("direct_keyword_analysis ํจ์ ์๋ฃ")
|
| 238 |
-
return df, excel_path
|
| 239 |
-
|
| 240 |
-
# --- ํตํฉ ๋ถ์ (ํํ์ ๋ถ์ + ์ง์ ํค์๋ ๋ถ์) ---
|
| 241 |
-
def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
|
| 242 |
-
debug_log("combined_analysis ํจ์ ์์")
|
| 243 |
-
merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
|
| 244 |
-
if "์ง์ ์
๋ ฅ" not in merged_df.columns:
|
| 245 |
-
merged_df["์ง์ ์
๋ ฅ"] = ""
|
| 246 |
-
direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
|
| 247 |
-
direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
|
| 248 |
-
debug_log(f"์
๋ ฅ๋ ์ง์ ํค์๋: {direct_keywords}")
|
| 249 |
-
for dk in direct_keywords:
|
| 250 |
-
if dk in merged_df["๋จ์ด"].values:
|
| 251 |
-
merged_df.loc[merged_df["๋จ์ด"] == dk, "์ง์ ์
๋ ฅ"] = "์ง์ ์
๋ ฅ"
|
| 252 |
-
else:
|
| 253 |
-
freq = blog_text.count(dk)
|
| 254 |
-
df_direct, _ = process_keyword(dk, include_related=False)
|
| 255 |
-
if (not df_direct.empty) and (dk in df_direct["์ ๋ณดํค์๋"].values):
|
| 256 |
-
row = df_direct[df_direct["์ ๋ณดํค์๋"] == dk].iloc[0]
|
| 257 |
-
pc = row.get("PC์๊ฒ์๋", None)
|
| 258 |
-
mobile = row.get("๋ชจ๋ฐ์ผ์๊ฒ์๋", None)
|
| 259 |
-
total = row.get("ํ ํ์๊ฒ์๋", None)
|
| 260 |
-
blog_count = row.get("๋ธ๋ก๊ทธ๋ฌธ์์", None)
|
| 261 |
-
else:
|
| 262 |
-
pc = mobile = total = blog_count = None
|
| 263 |
-
new_row = {
|
| 264 |
-
"๋จ์ด": dk,
|
| 265 |
-
"๋น๋์": freq,
|
| 266 |
-
"PC์๊ฒ์๋": pc,
|
| 267 |
-
"๋ชจ๋ฐ์ผ์๊ฒ์๋": mobile,
|
| 268 |
-
"ํ ํ์๊ฒ์๋": total,
|
| 269 |
-
"๋ธ๋ก๊ทธ๋ฌธ์์": blog_count,
|
| 270 |
-
"์ง์ ์
๋ ฅ": "์ง์ ์
๋ ฅ"
|
| 271 |
-
}
|
| 272 |
-
merged_df = pd.concat([merged_df, pd.DataFrame([new_row])], ignore_index=True)
|
| 273 |
-
merged_df = merged_df.sort_values(by="๋น๋์", ascending=False).reset_index(drop=True)
|
| 274 |
-
combined_excel = create_excel_file(merged_df)
|
| 275 |
-
debug_log("combined_analysis ํจ์ ์๋ฃ")
|
| 276 |
-
return merged_df, combined_excel
|
| 277 |
-
|
| 278 |
-
# --- ๋ถ์ ํธ๋ค๋ฌ ---
|
| 279 |
-
def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
|
| 280 |
-
debug_log("analysis_handler ํจ์ ์์")
|
| 281 |
-
if direct_keyword_only:
|
| 282 |
-
# "์ง์ ํค์๋ ์
๋ ฅ๋ง ๋ถ์" ์ ํ ์ ๋จ๋
๋ถ์ ์ํ
|
| 283 |
-
return direct_keyword_analysis(blog_text, direct_keyword_input)
|
| 284 |
-
else:
|
| 285 |
-
# ๊ธฐ๋ณธ ํตํฉ ๋ถ์ ์ํ
|
| 286 |
-
return combined_analysis(blog_text, remove_freq1, direct_keyword_input)
|
| 287 |
-
|
| 288 |
-
# --- ์คํฌ๋ํ ์คํ ---
|
| 289 |
-
def fetch_blog_content(url: str):
|
| 290 |
-
debug_log("fetch_blog_content ํจ์ ์์")
|
| 291 |
-
content = scrape_naver_blog(url)
|
| 292 |
-
debug_log("fetch_blog_content ํจ์ ์๋ฃ")
|
| 293 |
-
return content
|
| 294 |
-
|
| 295 |
-
# --- Custom CSS ---
|
| 296 |
-
custom_css = """
|
| 297 |
-
/* ์ ์ฒด ์ปจํ
์ด๋ ์คํ์ผ */
|
| 298 |
-
.gradio-container {
|
| 299 |
-
max-width: 960px;
|
| 300 |
-
margin: auto;
|
| 301 |
-
font-family: 'Helvetica Neue', Arial, sans-serif;
|
| 302 |
-
background: #f5f7fa;
|
| 303 |
-
padding: 2rem;
|
| 304 |
-
}
|
| 305 |
-
/* ํค๋ ์คํ์ผ */
|
| 306 |
-
.custom-header {
|
| 307 |
-
text-align: center;
|
| 308 |
-
font-size: 2.5rem;
|
| 309 |
-
font-weight: bold;
|
| 310 |
-
margin-bottom: 1.5rem;
|
| 311 |
-
color: #333;
|
| 312 |
-
}
|
| 313 |
-
/* ๊ทธ๋ฃน ๋ฐ์ค ์คํ์ผ */
|
| 314 |
-
.custom-group {
|
| 315 |
-
background: #ffffff;
|
| 316 |
-
border-radius: 8px;
|
| 317 |
-
padding: 1.5rem;
|
| 318 |
-
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 319 |
-
margin-bottom: 1.5rem;
|
| 320 |
-
}
|
| 321 |
-
/* ๋ฒํผ ์คํ์ผ */
|
| 322 |
-
.custom-button {
|
| 323 |
-
background-color: #007bff;
|
| 324 |
-
color: #fff;
|
| 325 |
-
border: none;
|
| 326 |
-
border-radius: 4px;
|
| 327 |
-
padding: 0.6rem 1.2rem;
|
| 328 |
-
font-size: 1rem;
|
| 329 |
-
cursor: pointer;
|
| 330 |
-
transition: background-color 0.3s;
|
| 331 |
-
}
|
| 332 |
-
.custom-button:hover {
|
| 333 |
-
background-color: #0056b3;
|
| 334 |
-
}
|
| 335 |
-
/* ์ฒดํฌ๋ฐ์ค ์คํ์ผ */
|
| 336 |
-
.custom-checkbox {
|
| 337 |
-
margin-right: 1rem;
|
| 338 |
-
font-size: 1rem;
|
| 339 |
-
font-weight: bold;
|
| 340 |
-
}
|
| 341 |
-
/* ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ ๋ค์ด๋ก๋ ๋ฒํผ */
|
| 342 |
-
.custom-result {
|
| 343 |
-
margin-top: 1.5rem;
|
| 344 |
-
}
|
| 345 |
-
/* ๊ฐ์ด๋ฐ ์ ๋ ฌ */
|
| 346 |
-
.centered {
|
| 347 |
-
display: flex;
|
| 348 |
-
justify-content: center;
|
| 349 |
-
align-items: center;
|
| 350 |
-
}
|
| 351 |
-
/* ์ฌ์ฉ์ค๋ช
์คํ์ผ */
|
| 352 |
-
.usage-instructions {
|
| 353 |
-
font-size: 1.1rem;
|
| 354 |
-
line-height: 1.6;
|
| 355 |
-
color: #555;
|
| 356 |
-
background: #fff;
|
| 357 |
-
padding: 1.5rem;
|
| 358 |
-
border-radius: 8px;
|
| 359 |
-
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 360 |
-
margin-top: 2rem;
|
| 361 |
-
}
|
| 362 |
-
.usage-instructions h2 {
|
| 363 |
-
font-size: 1.8rem;
|
| 364 |
-
font-weight: bold;
|
| 365 |
-
margin-bottom: 1rem;
|
| 366 |
-
color: #333;
|
| 367 |
-
}
|
| 368 |
-
.usage-instructions ul {
|
| 369 |
-
list-style: disc;
|
| 370 |
-
margin-left: 2rem;
|
| 371 |
-
}
|
| 372 |
-
"""
|
| 373 |
-
|
| 374 |
-
# --- Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ ---
|
| 375 |
-
with gr.Blocks(title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ํํ์ ๋ถ์ ์๋น์ค", css=custom_css) as demo:
|
| 376 |
-
gr.HTML("<div class='custom-header'>๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ํํ์ ๋ถ์ ์๋น์ค ๐</div>")
|
| 377 |
-
# ๋ธ๋ก๊ทธ ๋งํฌ์ ์คํฌ๋ํ ์คํ ๋ฒํผ์ ํ ๊ทธ๋ฃน ๋ด์ ๋ฐฐ์น (๋ฒํผ์ ๊ฐ์ด๋ฐ ์ ๋ ฌ)
|
| 378 |
-
with gr.Group(elem_classes="custom-group"):
|
| 379 |
with gr.Row():
|
| 380 |
blog_url_input = gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ", placeholder="์: https://blog.naver.com/ssboost/222983068507", lines=1)
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
with gr.
|
|
|
|
|
|
|
| 386 |
with gr.Row():
|
| 387 |
-
|
| 388 |
with gr.Row():
|
| 389 |
-
|
| 390 |
with gr.Row():
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
with gr.Row(elem_classes="centered"):
|
| 394 |
-
analyze_button = gr.Button("๋ถ์ ์คํ", elem_classes="custom-button")
|
| 395 |
-
with gr.Group(elem_classes="custom-group custom-result"):
|
| 396 |
-
result_df = gr.Dataframe(label="ํตํฉ ๋ถ์ ๊ฒฐ๊ณผ (๋จ์ด, ๋น๋์, ๊ฒ์๋, ๋ธ๋ก๊ทธ๋ฌธ์์, ์ง์ ์
๋ ฅ)", interactive=True)
|
| 397 |
-
with gr.Group(elem_classes="custom-group"):
|
| 398 |
-
excel_file = gr.File(label="Excel ๋ค์ด๋ก๋")
|
| 399 |
-
# ์ฌ์ฉ์ค๋ช
HTML ๋ธ๋ก (์๋์ ๋ฐฐ์น)
|
| 400 |
-
with gr.Group(elem_classes="custom-group"):
|
| 401 |
-
usage_html = gr.HTML("""
|
| 402 |
-
<div class="usage-instructions">
|
| 403 |
-
<h2>์ฌ์ฉ ์ค๋ช
๐</h2>
|
| 404 |
-
<ul>
|
| 405 |
-
<li>๐ <strong>๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ</strong>: ๋ถ์ํ ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ URL์ ์
๋ ฅํ์ธ์.</li>
|
| 406 |
-
<li>โ๏ธ <strong>์คํฌ๋ํ ์คํ</strong>: ๋งํฌ ์
๋ ฅ ํ ๋ฒํผ์ ํด๋ฆญํ๋ฉด ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ด ์๋์ผ๋ก ๋ถ๋ฌ์์ง๋๋ค.</li>
|
| 407 |
-
<li>๐ <strong>๋ธ๋ก๊ทธ ๋ด์ฉ (์์ ๊ฐ๋ฅ)</strong>: ๋ถ๋ฌ์จ ๋ธ๋ก๊ทธ ๋ด์ฉ์ด ํ์๋๋ฉฐ, ํ์์ ๋ฐ๋ผ ์ง์ ์์ ํ ์ ์์ต๋๋ค.</li>
|
| 408 |
-
<li>โ๏ธ <strong>์ต์
์ค์ </strong>:
|
| 409 |
-
<ul>
|
| 410 |
-
<li><em>๋น๋์1 ์ ๊ฑฐ</em>: ๊ธฐ๋ณธ ์ ํ๋์ด ์์ผ๋ฉฐ, ๋น๋์๊ฐ 1์ธ ๋จ์ด๋ ๊ฒฐ๊ณผ์์ ์ ์ธํฉ๋๋ค.</li>
|
| 411 |
-
<li><em>์ง์ ํค์๋ ์
๋ ฅ๋ง ๋ถ์</em>: ์ด ์ต์
์ ์ ํํ๋ฉด, ๋ธ๋ก๊ทธ ๋ณธ๋ฌธ์์ ์ง์ ์
๋ ฅํ ํค์๋๋ง ๋ถ์ํฉ๋๋ค.</li>
|
| 412 |
-
</ul>
|
| 413 |
-
</li>
|
| 414 |
-
<li>๐ค <strong>์ง์ ํค์๋ ์
๋ ฅ</strong>: ์ํฐ ๋๋ ์ผํ(,)๋ก ๊ตฌ๋ถํ์ฌ ๋ถ์ํ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์.</li>
|
| 415 |
-
<li>๐ <strong>๋ถ์ ์คํ</strong>: ์ค์ ํ ์ต์
์ ๋ฐ๋ผ ํํ์ ๋ถ์ ๋ฐ ํค์๋ ๋ถ์์ด ์ํ๋์ด ๊ฒฐ๊ณผ๊ฐ ํ์ Excel ํ์ผ๋ก ์ถ๋ ฅ๋ฉ๋๋ค.</li>
|
| 416 |
-
<li>๐ฅ <strong>Excel ๋ค์ด๋ก๋</strong>: ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ Excel ํ์ผ๋ก ๋ค์ด๋ก๋ํ ์ ์์ต๋๋ค.</li>
|
| 417 |
-
</ul>
|
| 418 |
-
<p><strong>Tip:</strong> ๋ถ์ ๊ฒฐ๊ณผ๋ ์ค์๊ฐ์ผ๋ก ์
๋ฐ์ดํธ๋๋ฉฐ, ํ์์ ์์ ํ ๋ค์ ๋ถ์ํ ์ ์์ต๋๋ค. ์ฆ๊ฑฐ์ด ๋ถ์ ๋์ธ์! ๐</p>
|
| 419 |
-
</div>
|
| 420 |
-
""")
|
| 421 |
-
# ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
| 422 |
-
scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
|
| 423 |
-
analyze_button.click(fn=analysis_handler,
|
| 424 |
-
inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
|
| 425 |
-
outputs=[result_df, excel_file])
|
| 426 |
|
| 427 |
if __name__ == "__main__":
|
| 428 |
debug_log("Gradio ์ฑ ์คํ ์์")
|
| 429 |
demo.launch()
|
| 430 |
-
debug_log("Gradio ์ฑ ์คํ ์ข
๋ฃ")
|
|
|
|
| 17 |
def debug_log(message: str):
|
| 18 |
print(f"[DEBUG] {message}")
|
| 19 |
|
| 20 |
+
# [๊ธฐ๋ณธ์ฝ๋] - ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ ๊ธฐ๋ฅ
|
| 21 |
def scrape_naver_blog(url: str) -> str:
|
| 22 |
debug_log("scrape_naver_blog ํจ์ ์์")
|
| 23 |
debug_log(f"์์ฒญ๋ฐ์ URL: {url}")
|
| 24 |
+
|
| 25 |
headers = {
|
| 26 |
"User-Agent": (
|
| 27 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
|
|
| 29 |
"Chrome/96.0.4664.110 Safari/537.36"
|
| 30 |
)
|
| 31 |
}
|
| 32 |
+
|
| 33 |
try:
|
| 34 |
+
# 1) ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ '๋ฉ์ธ' ํ์ด์ง ์์ฒญ
|
| 35 |
response = requests.get(url, headers=headers)
|
| 36 |
debug_log("HTTP GET ์์ฒญ(๋ฉ์ธ ํ์ด์ง) ์๋ฃ")
|
| 37 |
if response.status_code != 200:
|
| 38 |
debug_log(f"์์ฒญ ์คํจ, ์ํ์ฝ๋: {response.status_code}")
|
| 39 |
return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ํ์ฝ๋: {response.status_code}"
|
| 40 |
+
|
| 41 |
+
# 2) ๋ฉ์ธ ํ์ด์ง ํ์ฑ
|
| 42 |
soup = BeautifulSoup(response.text, "html.parser")
|
| 43 |
debug_log("HTML ํ์ฑ(๋ฉ์ธ ํ์ด์ง) ์๋ฃ")
|
| 44 |
+
|
| 45 |
+
# 3) iframe ํ๊ทธ ์ฐพ๊ธฐ
|
| 46 |
iframe = soup.select_one("iframe#mainFrame")
|
| 47 |
if not iframe:
|
| 48 |
debug_log("iframe#mainFrame ํ๊ทธ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
|
|
|
| 51 |
if not iframe_src:
|
| 52 |
debug_log("iframe src๊ฐ ์กด์ฌํ์ง ์์ต๋๋ค.")
|
| 53 |
return "๋ณธ๋ฌธ iframe์ src๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
| 54 |
+
|
| 55 |
+
# 4) iframe src ๋ณด์ (์ ๋๊ฒฝ๋ก ์ฒ๋ฆฌ)
|
| 56 |
parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
|
| 57 |
debug_log(f"iframe ํ์ด์ง ์์ฒญ URL: {parsed_iframe_url}")
|
| 58 |
+
|
| 59 |
+
# 5) iframe ํ์ด์ง ์์ฒญ ๋ฐ ํ์ฑ
|
| 60 |
iframe_response = requests.get(parsed_iframe_url, headers=headers)
|
| 61 |
debug_log("HTTP GET ์์ฒญ(iframe ํ์ด์ง) ์๋ฃ")
|
| 62 |
if iframe_response.status_code != 200:
|
|
|
|
| 64 |
return f"iframe์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ํ์ฝ๋: {iframe_response.status_code}"
|
| 65 |
iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
|
| 66 |
debug_log("HTML ํ์ฑ(iframe ํ์ด์ง) ์๋ฃ")
|
| 67 |
+
|
| 68 |
+
# 6) ์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ ์ถ์ถ
|
| 69 |
title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
|
| 70 |
title = title_div.get_text(strip=True) if title_div else "์ ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
| 71 |
debug_log(f"์ถ์ถ๋ ์ ๋ชฉ: {title}")
|
|
|
|
| 75 |
else:
|
| 76 |
content = "๋ณธ๋ฌธ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
| 77 |
debug_log("๋ณธ๋ฌธ ์ถ์ถ ์๋ฃ")
|
| 78 |
+
|
| 79 |
result = f"[์ ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
|
| 80 |
+
debug_log("์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ ํฉ์ณ ๋ฐํ ์ค๋น ์๋ฃ")
|
| 81 |
return result
|
| 82 |
+
|
| 83 |
except Exception as e:
|
| 84 |
debug_log(f"์๋ฌ ๋ฐ์: {str(e)}")
|
| 85 |
return f"์คํฌ๋ํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
| 86 |
|
| 87 |
+
# [์ฐธ์กฐ์ฝ๋-1] ํํ์ ๋ถ์ ๊ธฐ๋ฅ
|
| 88 |
def analyze_text(text: str):
|
| 89 |
logging.basicConfig(level=logging.DEBUG)
|
| 90 |
logger = logging.getLogger(__name__)
|
| 91 |
logger.debug("์๋ณธ ํ
์คํธ: %s", text)
|
| 92 |
+
|
| 93 |
+
# 1. ํ๊ตญ์ด๋ง ๋จ๊ธฐ๊ธฐ (๊ณต๋ฐฑ, ์์ด, ๊ธฐํธ ๋ฑ ์ ๊ฑฐ)
|
| 94 |
filtered_text = re.sub(r'[^๊ฐ-ํฃ]', '', text)
|
| 95 |
+
logger.debug("ํํฐ๋ง๋ ํ
์คํธ (ํ๊ตญ์ด๋ง, ๊ณต๋ฐฑ ์ ๊ฑฐ): %s", filtered_text)
|
| 96 |
+
|
| 97 |
if not filtered_text:
|
| 98 |
logger.debug("์ ํจํ ํ๊ตญ์ด ํ
์คํธ๊ฐ ์์.")
|
| 99 |
return pd.DataFrame(columns=["๋จ์ด", "๋น๋์"]), ""
|
| 100 |
+
|
| 101 |
+
# 2. Mecab์ ์ด์ฉํ ํํ์ ๋ถ์ (๋ช
์ฌ์ ๋ณตํฉ๋ช
์ฌ๋ง ์ถ์ถ)
|
| 102 |
mecab_instance = mecab.MeCab()
|
| 103 |
tokens = mecab_instance.pos(filtered_text)
|
| 104 |
logger.debug("ํํ์ ๋ถ์ ๊ฒฐ๊ณผ: %s", tokens)
|
| 105 |
+
|
| 106 |
freq = {}
|
| 107 |
for word, pos in tokens:
|
| 108 |
+
if word and word.strip():
|
| 109 |
+
if pos.startswith("NN"):
|
| 110 |
+
freq[word] = freq.get(word, 0) + 1
|
| 111 |
+
logger.debug("๋จ์ด: %s, ํ์ฌ: %s, ํ์ฌ ๋น๋: %d", word, pos, freq[word])
|
| 112 |
+
|
| 113 |
+
# 3. ๋น๋์๋ฅผ ๋ด๋ฆผ์ฐจ์ ์ ๋ ฌ
|
| 114 |
sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
|
| 115 |
+
logger.debug("๋ด๋ฆผ์ฐจ์ ์ ๋ ฌ๋ ๋จ์ด ๋น๋: %s", sorted_freq)
|
| 116 |
+
|
| 117 |
+
# 4. ๊ฒฐ๊ณผ DataFrame ์์ฑ
|
| 118 |
df = pd.DataFrame(sorted_freq, columns=["๋จ์ด", "๋น๋์"])
|
| 119 |
+
logger.debug("๊ฒฐ๊ณผ DataFrame ์์ฑ๋จ, shape: %s", df.shape)
|
| 120 |
+
|
| 121 |
+
# 5. Excel ํ์ผ ์์ฑ (์์ ํ์ผ)
|
| 122 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
|
| 123 |
df.to_excel(temp_file.name, index=False, engine='openpyxl')
|
| 124 |
temp_file.close()
|
| 125 |
logger.debug("Excel ํ์ผ ์์ฑ๋จ: %s", temp_file.name)
|
| 126 |
+
|
| 127 |
return df, temp_file.name
|
| 128 |
|
| 129 |
+
# [์ฐธ์กฐ์ฝ๋-2] ๋ค์ด๋ฒ ๊ด๊ณ API ๋ฐ ๊ฒ์๋/๋ธ๋ก๊ทธ๋ฌธ์์ ์กฐํ ๊ธฐ๋ฅ
|
| 130 |
def generate_signature(timestamp, method, uri, secret_key):
|
| 131 |
message = f"{timestamp}.{method}.{uri}"
|
| 132 |
digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
|
|
|
|
| 148 |
API_KEY = os.environ["NAVER_API_KEY"]
|
| 149 |
SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
|
| 150 |
CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
|
| 151 |
+
|
| 152 |
BASE_URL = "https://api.naver.com"
|
| 153 |
uri = "/keywordstool"
|
| 154 |
method = "GET"
|
|
|
|
| 164 |
df = pd.DataFrame(data["keywordList"])
|
| 165 |
if len(df) > 100:
|
| 166 |
df = df.head(100)
|
| 167 |
+
|
| 168 |
def parse_count(x):
|
| 169 |
try:
|
| 170 |
return int(str(x).replace(",", ""))
|
| 171 |
except:
|
| 172 |
return 0
|
| 173 |
+
|
| 174 |
df["PC์๊ฒ์๋"] = df["monthlyPcQcCnt"].apply(parse_count)
|
| 175 |
df["๋ชจ๋ฐ์ผ์๊ฒ์๋"] = df["monthlyMobileQcCnt"].apply(parse_count)
|
| 176 |
df["ํ ํ์๊ฒ์๋"] = df["PC์๊ฒ์๋"] + df["๋ชจ๋ฐ์ผ์๊ฒ์๋"]
|
|
|
|
| 209 |
debug_log(f"process_keyword ํธ์ถ, ํค์๋๋ค: {keywords}, ์ฐ๊ด๊ฒ์์ด ํฌํจ: {include_related}")
|
| 210 |
input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
|
| 211 |
result_dfs = []
|
| 212 |
+
|
| 213 |
for idx, kw in enumerate(input_keywords):
|
| 214 |
df_kw = fetch_related_keywords(kw)
|
| 215 |
if df_kw.empty:
|
|
|
|
| 223 |
df_related = df_kw[df_kw["์ ๋ณดํค์๋"] != kw]
|
| 224 |
if not df_related.empty:
|
| 225 |
result_dfs.append(df_related)
|
| 226 |
+
|
| 227 |
if result_dfs:
|
| 228 |
result_df = pd.concat(result_dfs, ignore_index=True)
|
| 229 |
result_df.drop_duplicates(subset=["์ ๋ณดํค์๋"], inplace=True)
|
| 230 |
else:
|
| 231 |
result_df = pd.DataFrame(columns=["์ ๋ณดํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋"])
|
| 232 |
+
|
| 233 |
result_df["๋ธ๋ก๊ทธ๋ฌธ์์"] = result_df["์ ๋ณดํค์๋"].apply(fetch_blog_count)
|
| 234 |
result_df.sort_values(by="ํ ํ์๊ฒ์๋", ascending=False, inplace=True)
|
| 235 |
debug_log("process_keyword ์๋ฃ")
|
| 236 |
return result_df, create_excel_file(result_df)
|
| 237 |
|
| 238 |
+
# ์๋ก์ด ๊ธฐ๋ฅ: '๋ธ๋ก๊ทธ๋ด์ฉ๊ฐ์ ธ์ค๊ธฐ' ์คํ ์ ๋ธ๋ก๊ทธ ๋งํฌ๋ก๋ถํฐ ์ ๋ชฉ/๋ณธ๋ฌธ ์คํฌ๋ํ
|
| 239 |
+
def fetch_blog_content(url: str):
|
| 240 |
+
debug_log("fetch_blog_content ํจ์ ์์")
|
| 241 |
+
content = scrape_naver_blog(url)
|
| 242 |
+
debug_log("fetch_blog_content ํจ์ ์๋ฃ")
|
| 243 |
+
return content
|
| 244 |
+
|
| 245 |
+
# ์๋ก์ด ๊ธฐ๋ฅ: ํํ์ ๋ถ์ ๋ฐ ๊ฒ์๋, ๋ธ๋ก๊ทธ๋ฌธ์์ ์ถ๊ฐ
|
| 246 |
+
def morphological_analysis_and_enrich(text: str):
|
| 247 |
debug_log("morphological_analysis_and_enrich ํจ์ ์์")
|
| 248 |
df_freq, _ = analyze_text(text)
|
| 249 |
if df_freq.empty:
|
| 250 |
debug_log("ํํ์ ๋ถ์ ๊ฒฐ๊ณผ๊ฐ ๋น ๋ฐ์ดํฐํ๋ ์์
๋๋ค.")
|
| 251 |
return df_freq, ""
|
| 252 |
+
|
| 253 |
+
# ํํ์ ๋ถ์ ๊ฒฐ๊ณผ์์ ํค์๋ ์ถ์ถ (๊ฐ ๋จ์ด๋ฅผ ์ํฐ๋ก ๊ตฌ๋ถ)
|
|
|
|
|
|
|
| 254 |
keywords = "\n".join(df_freq["๋จ์ด"].tolist())
|
| 255 |
debug_log(f"๋ถ์๋ ํค์๋: {keywords}")
|
| 256 |
+
|
| 257 |
+
# [์ฐธ์กฐ์ฝ๋-2]๋ฅผ ํ์ฉํ์ฌ ๊ฐ ํค์๋์ ๊ฒ์๋ ๋ฐ ๋ธ๋ก๊ทธ๋ฌธ์์ ์กฐํ (์ฐ๊ด๊ฒ์์ด ๋ฏธํฌํจ)
|
| 258 |
df_keyword_info, _ = process_keyword(keywords, include_related=False)
|
| 259 |
debug_log("๊ฒ์๋ ๋ฐ ๋ธ๋ก๊ทธ๋ฌธ์์ ์กฐํ ์๋ฃ")
|
| 260 |
+
|
| 261 |
+
# ํํ์ ๋ถ์ ๊ฒฐ๊ณผ์ ๊ฒ์๋ ์ ๋ณด๋ฅผ ๋ณํฉ (ํค์๋ ๊ธฐ์ค)
|
| 262 |
merged_df = pd.merge(df_freq, df_keyword_info, left_on="๋จ์ด", right_on="์ ๋ณดํค์๋", how="left")
|
| 263 |
merged_df.drop(columns=["์ ๋ณดํค์๋"], inplace=True)
|
| 264 |
+
|
| 265 |
+
# ๋ณํฉ ๊ฒฐ๊ณผ Excel ํ์ผ ์์ฑ
|
| 266 |
merged_excel_path = create_excel_file(merged_df)
|
| 267 |
debug_log("morphological_analysis_and_enrich ํจ์ ์๋ฃ")
|
| 268 |
return merged_df, merged_excel_path
|
| 269 |
|
| 270 |
+
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ (Hugging Face Spaces ํ๊ฒฝ์ ์ ํฉ)
|
| 271 |
+
with gr.Blocks(title="๋ธ๋ก๊ทธ๊ธ ํํ์ ๋ถ์ ์คํ์ด์ค", css=".gradio-container { max-width: 960px; margin: auto; }") as demo:
|
| 272 |
+
gr.Markdown("# ๋ธ๋ก๊ทธ๊ธ ํํ์ ๋ถ์ ์คํ์ด์ค")
|
| 273 |
+
|
| 274 |
+
with gr.Tab("๋ธ๋ก๊ทธ ๋ด์ฉ ๊ฐ์ ธ์ค๊ธฐ"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
with gr.Row():
|
| 276 |
blog_url_input = gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ", placeholder="์: https://blog.naver.com/ssboost/222983068507", lines=1)
|
| 277 |
+
fetch_button = gr.Button("๋ธ๋ก๊ทธ๋ด์ฉ๊ฐ์ ธ์ค๊ธฐ")
|
| 278 |
+
blog_content = gr.Textbox(label="๋ธ๋ก๊ทธ ๋ด์ฉ", lines=10, placeholder="๋ธ๋ก๊ทธ ๋ด์ฉ์ ๊ฐ์ ธ์ค๊ฑฐ๋ ์ง์ ์
๋ ฅํ์ธ์.")
|
| 279 |
+
fetch_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content)
|
| 280 |
+
|
| 281 |
+
with gr.Tab("ํํ์ ๋ถ์"):
|
| 282 |
+
with gr.Row():
|
| 283 |
+
analysis_input = gr.Textbox(label="๋ถ์ํ ํ
์คํธ", lines=10, placeholder="๋ถ์ํ ํ
์คํธ๋ฅผ ์
๋ ฅํ๊ฑฐ๋ '๋ธ๋ก๊ทธ ๋ด์ฉ ๊ฐ์ ธ์ค๊ธฐ'์์ ๊ฐ์ ธ์จ ๋ด์ฉ์ ์์ ํ์ธ์.")
|
| 284 |
with gr.Row():
|
| 285 |
+
analyze_button = gr.Button("ํํ์๋ถ์")
|
| 286 |
with gr.Row():
|
| 287 |
+
analysis_result = gr.Dataframe(label="๋ถ์ ๊ฒฐ๊ณผ (๋จ์ด, ๋น๋์, ๊ฒ์๋, ๋ธ๋ก๊ทธ๋ฌธ์์ ๋ฑ)")
|
| 288 |
with gr.Row():
|
| 289 |
+
analysis_excel = gr.File(label="Excel ๋ค์ด๋ก๋")
|
| 290 |
+
analyze_button.click(fn=morphological_analysis_and_enrich, inputs=analysis_input, outputs=[analysis_result, analysis_excel])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
if __name__ == "__main__":
|
| 293 |
debug_log("Gradio ์ฑ ์คํ ์์")
|
| 294 |
demo.launch()
|
| 295 |
+
debug_log("Gradio ์ฑ ์คํ ์ข
๋ฃ")
|