Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -350,6 +350,7 @@ def expand_synonyms(keywords: List[str]) -> List[List[str]]:
|
|
| 350 |
|
| 351 |
return merged
|
| 352 |
|
|
|
|
| 353 |
def _dedup_keywords_preserve_order(keywords: List[str]) -> List[str]:
|
| 354 |
"""
|
| 355 |
๊ฒ์์ด ์ ๊ทํ + ์๋ฏธ ์ค๋ณต ์ ๊ฑฐ:
|
|
@@ -384,7 +385,6 @@ def _dedup_keywords_preserve_order(keywords: List[str]) -> List[str]:
|
|
| 384 |
return final
|
| 385 |
|
| 386 |
|
| 387 |
-
|
| 388 |
def build_queries(
|
| 389 |
sentence: str,
|
| 390 |
selected_keywords: List[str],
|
|
@@ -401,7 +401,7 @@ def build_queries(
|
|
| 401 |
if not selected_keywords:
|
| 402 |
selected_keywords = extract_candidates(sentence, max_candidates=10)[:4]
|
| 403 |
|
| 404 |
-
selected_keywords = _dedup_keywords_preserve_order(selected_keywords)
|
| 405 |
|
| 406 |
# Q1: ๊ธฐ๋ณธ AND
|
| 407 |
q1 = " ".join(selected_keywords).strip()
|
|
@@ -420,7 +420,7 @@ def build_queries(
|
|
| 420 |
# g[0]์ ์๋ฌธ ์ ์ง, g[1]์ด ์์ผ๋ฉด ์นํ๋ ํ๋ณด๋ฅผ ์ฐ์ ์ฌ์ฉ
|
| 421 |
combo.append(g[1] if len(g) >= 2 else g[0])
|
| 422 |
|
| 423 |
-
combo = _dedup_keywords_preserve_order(combo)
|
| 424 |
q2 = " ".join(combo).strip()
|
| 425 |
|
| 426 |
# Q2๊ฐ Q1๊ณผ ๋ค๋ฅด๊ณ , ๋น์ด์์ง ์์ผ๋ฉด ์ถ๊ฐ
|
|
@@ -482,39 +482,25 @@ def aggregate_search(
|
|
| 482 |
sort: str,
|
| 483 |
) -> Tuple[List[str], List[Dict[str, Any]]]:
|
| 484 |
"""
|
| 485 |
-
๋ฌธ์ฅ ์
๋ ฅ ->
|
| 486 |
๋ฐํ: (์์ฑ๋ ์ฟผ๋ฆฌ ๋ชฉ๋ก, ์ต์ข
์์ดํ
๋ชฉ๋ก)
|
| 487 |
"""
|
| 488 |
-
# 1) ํ๋ณด ์์ฑ(๊ท์น/ํต๊ณ)
|
| 489 |
-
candidates = extract_candidates(sentence, max_candidates=60)
|
| 490 |
-
|
| 491 |
-
# 2) ์๋ฒ ๋ฉ์ผ๋ก ์์ ํค์๋ ์ ๋ณ
|
| 492 |
-
selected = select_keywords_by_embedding(sentence, candidates, top_n=10)
|
| 493 |
-
|
| 494 |
-
# 3) ์ ์ธ์ด ๊ฐ์ง
|
| 495 |
-
excludes = [] # detect_excludes(sentence)
|
| 496 |
|
| 497 |
-
#
|
| 498 |
-
queries =
|
| 499 |
-
|
| 500 |
-
# 5) ์ฟผ๋ฆฌ๋ณ ํธ์ถ (๊ฐ ์ฟผ๋ฆฌ๋น display๋ฅผ ๋๋์ด ํ ๋นํ์ฌ ํญ์ฃผ ๋ฐฉ์ง)
|
| 501 |
-
# ์: display=20, queries=4 -> ๊ฐ 5๊ฐ์ฉ
|
| 502 |
-
# ๋จ, ์ต์ 3๊ฐ๋ ์ ์ง(๋๋ฌด ์ ์ผ๋ฉด ์๋ฏธ๊ฐ ๋จ์ด์ง)
|
| 503 |
-
per_q = max(3, int(np.ceil(display / max(1, len(queries)))))
|
| 504 |
-
per_q = min(per_q, display)
|
| 505 |
|
| 506 |
all_items: List[Dict[str, Any]] = []
|
| 507 |
for q in queries:
|
| 508 |
-
data = naver_news_search(query=q, display=
|
| 509 |
all_items.extend(data.get("items", []))
|
| 510 |
|
| 511 |
-
#
|
| 512 |
merged = dedup_items(all_items)
|
| 513 |
|
| 514 |
-
#
|
| 515 |
reranked = rerank_items_by_embedding(sentence, merged)
|
| 516 |
|
| 517 |
-
#
|
| 518 |
final_items = reranked[:display]
|
| 519 |
return queries, final_items
|
| 520 |
|
|
@@ -564,16 +550,15 @@ def handle_search(
|
|
| 564 |
|
| 565 |
try:
|
| 566 |
queries, items = aggregate_search(sentence=q, display=int(display), sort=sort)
|
| 567 |
-
|
| 568 |
lines = []
|
| 569 |
-
# lines.append(f"์ง์: {q}")
|
| 570 |
lines.append("")
|
| 571 |
lines.append("API ํธ์ถ์ ์ฌ์ฉ๋ ๊ฒ์์ด(query)๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:")
|
| 572 |
for i, qq in enumerate(queries, start=1):
|
| 573 |
lines.append(f"- Q{i}: `{qq}`")
|
| 574 |
lines.append("")
|
| 575 |
lines.append(render_results_from_items(items))
|
| 576 |
-
|
| 577 |
assistant_text = "\n".join(lines).strip()
|
| 578 |
|
| 579 |
except Exception as e:
|
|
@@ -585,39 +570,17 @@ def handle_search(
|
|
| 585 |
|
| 586 |
|
| 587 |
with gr.Blocks(title="Naver News Search (Chat UI)") as demo:
|
| 588 |
-
# gr.Markdown(
|
| 589 |
-
# """
|
| 590 |
-
# # ๋ค์ด๋ฒ ๋ด์ค ๊ฒ์ ํ
์คํธ (๋ฌธ์ฅ ์
๋ ฅ โ ํค์๋ ์ถ์ถ/ํ์ฅ โ ๋ค์ค ์ฟผ๋ฆฌ ํธ์ถ)
|
| 591 |
-
# - ํ๋จ ์
๋ ฅ์ฐฝ์ **๋ฌธ์ฅ ํํ**๋ก ์
๋ ฅํ์๋ฉด, (๊ท์น/ํต๊ณ + ์๋ฒ ๋ฉ)์ผ๋ก ํค์๋๋ฅผ ์ ๋ณยทํ์ฅํ๊ณ ,
|
| 592 |
-
# AND/OR/์ ์ธ ํํ์ **๋ค์ค ์ฟผ๋ฆฌ**๋ฅผ ์๋ ์์ฑํ์ฌ ๋ค์ด๋ฒ ๋ด์ค ๊ฒ์ API๋ฅผ ํธ์ถํฉ๋๋ค.
|
| 593 |
-
# - ์ธ์ฆํค๋ Hugging Face Spaces **Secrets**์ `NAVER_CLIENT_ID`, `NAVER_CLIENT_SECRET`์ ์ฌ์ฉํฉ๋๋ค.
|
| 594 |
-
# - ์๋ฒ ๋ฉ ๋ชจ๋ธ์ ๊ธฐ๋ณธ `jhgan/ko-sroberta-multitask`์ด๋ฉฐ, ํ์ ์ Secrets/ํ๊ฒฝ๋ณ์ `EMBEDDING_MODEL`๋ก ๋ณ๊ฒฝ ๊ฐ๋ฅํฉ๋๋ค.
|
| 595 |
-
# """
|
| 596 |
-
# )
|
| 597 |
-
|
| 598 |
-
# with gr.Row():
|
| 599 |
-
# display = gr.Slider(
|
| 600 |
-
# minimum=1, maximum=50, value=20, step=1, label="ํ์ ๊ฐ์(display)"
|
| 601 |
-
# )
|
| 602 |
-
# sort = gr.Dropdown(
|
| 603 |
-
# choices=["date", "sim"],
|
| 604 |
-
# value="date",
|
| 605 |
-
# label="์ ๋ ฌ(sort)",
|
| 606 |
-
# info="date=์ต์ ์, sim=์ ํ๋์",
|
| 607 |
-
# )
|
| 608 |
-
|
| 609 |
with gr.Accordion("๊ฒ์ ์ต์
", open=False):
|
| 610 |
with gr.Row():
|
| 611 |
display = gr.Slider(
|
| 612 |
minimum=1, maximum=100, value=20, step=1, label="ํ์ ๊ฐ์(display)"
|
| 613 |
)
|
| 614 |
sort = gr.Dropdown(
|
| 615 |
-
choices=[("์ต์ ์", "date"), ("์ ํ๋์(์ฐ๊ด๋์)", "sim")
|
| 616 |
value="date",
|
| 617 |
label="์ ๋ ฌ(sort)",
|
| 618 |
)
|
| 619 |
|
| 620 |
-
|
| 621 |
chatbot = gr.Chatbot(
|
| 622 |
value=[],
|
| 623 |
label="NewsChat_v0.1",
|
|
|
|
| 350 |
|
| 351 |
return merged
|
| 352 |
|
| 353 |
+
|
| 354 |
def _dedup_keywords_preserve_order(keywords: List[str]) -> List[str]:
|
| 355 |
"""
|
| 356 |
๊ฒ์์ด ์ ๊ทํ + ์๋ฏธ ์ค๋ณต ์ ๊ฑฐ:
|
|
|
|
| 385 |
return final
|
| 386 |
|
| 387 |
|
|
|
|
| 388 |
def build_queries(
|
| 389 |
sentence: str,
|
| 390 |
selected_keywords: List[str],
|
|
|
|
| 401 |
if not selected_keywords:
|
| 402 |
selected_keywords = extract_candidates(sentence, max_candidates=10)[:4]
|
| 403 |
|
| 404 |
+
selected_keywords = _dedup_keywords_preserve_order(selected_keywords) # gk
|
| 405 |
|
| 406 |
# Q1: ๊ธฐ๋ณธ AND
|
| 407 |
q1 = " ".join(selected_keywords).strip()
|
|
|
|
| 420 |
# g[0]์ ์๋ฌธ ์ ์ง, g[1]์ด ์์ผ๋ฉด ์นํ๋ ํ๋ณด๋ฅผ ์ฐ์ ์ฌ์ฉ
|
| 421 |
combo.append(g[1] if len(g) >= 2 else g[0])
|
| 422 |
|
| 423 |
+
combo = _dedup_keywords_preserve_order(combo) # gk
|
| 424 |
q2 = " ".join(combo).strip()
|
| 425 |
|
| 426 |
# Q2๊ฐ Q1๊ณผ ๋ค๋ฅด๊ณ , ๋น์ด์์ง ์์ผ๋ฉด ์ถ๊ฐ
|
|
|
|
| 482 |
sort: str,
|
| 483 |
) -> Tuple[List[str], List[Dict[str, Any]]]:
|
| 484 |
"""
|
| 485 |
+
๋ฌธ์ฅ ์
๋ ฅ -> (ํ์ฌ๋) ์ฌ์ฉ์ ์
๋ ฅ ๋ฌธ์ฅ์ ๊ทธ๋๋ก query๋ก ์ฌ์ฉํ์ฌ API ํธ์ถ
|
| 486 |
๋ฐํ: (์์ฑ๋ ์ฟผ๋ฆฌ ๋ชฉ๋ก, ์ต์ข
์์ดํ
๋ชฉ๋ก)
|
| 487 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
+
# โ
๋ณ๊ฒฝ๋ ํต์ฌ: ์ฌ์ฉ์ ์
๋ ฅ ๋ฌธ์ฅ์ ๊ทธ๋๋ก query๋ก ์ฌ์ฉ
|
| 490 |
+
queries = [sentence]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
|
| 492 |
all_items: List[Dict[str, Any]] = []
|
| 493 |
for q in queries:
|
| 494 |
+
data = naver_news_search(query=q, display=int(display), sort=sort, start=1)
|
| 495 |
all_items.extend(data.get("items", []))
|
| 496 |
|
| 497 |
+
# ํตํฉ/์ค๋ณต ์ ๊ฑฐ(๋จ์ผ ์ฟผ๋ฆฌ๋ผ๋ ์ ์ง)
|
| 498 |
merged = dedup_items(all_items)
|
| 499 |
|
| 500 |
+
# ์๋ฒ ๋ฉ ์ฌ๋ญํน(๊ธฐ์กด ๋์ ์ ์ง)
|
| 501 |
reranked = rerank_items_by_embedding(sentence, merged)
|
| 502 |
|
| 503 |
+
# ์ต์ข
๊ฐ์ ์ ๋จ
|
| 504 |
final_items = reranked[:display]
|
| 505 |
return queries, final_items
|
| 506 |
|
|
|
|
| 550 |
|
| 551 |
try:
|
| 552 |
queries, items = aggregate_search(sentence=q, display=int(display), sort=sort)
|
| 553 |
+
|
| 554 |
lines = []
|
|
|
|
| 555 |
lines.append("")
|
| 556 |
lines.append("API ํธ์ถ์ ์ฌ์ฉ๋ ๊ฒ์์ด(query)๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:")
|
| 557 |
for i, qq in enumerate(queries, start=1):
|
| 558 |
lines.append(f"- Q{i}: `{qq}`")
|
| 559 |
lines.append("")
|
| 560 |
lines.append(render_results_from_items(items))
|
| 561 |
+
|
| 562 |
assistant_text = "\n".join(lines).strip()
|
| 563 |
|
| 564 |
except Exception as e:
|
|
|
|
| 570 |
|
| 571 |
|
| 572 |
with gr.Blocks(title="Naver News Search (Chat UI)") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
with gr.Accordion("๊ฒ์ ์ต์
", open=False):
|
| 574 |
with gr.Row():
|
| 575 |
display = gr.Slider(
|
| 576 |
minimum=1, maximum=100, value=20, step=1, label="ํ์ ๊ฐ์(display)"
|
| 577 |
)
|
| 578 |
sort = gr.Dropdown(
|
| 579 |
+
choices=[("์ต์ ์", "date"), ("์ ํ๋์(์ฐ๊ด๋์)", "sim")],
|
| 580 |
value="date",
|
| 581 |
label="์ ๋ ฌ(sort)",
|
| 582 |
)
|
| 583 |
|
|
|
|
| 584 |
chatbot = gr.Chatbot(
|
| 585 |
value=[],
|
| 586 |
label="NewsChat_v0.1",
|