gooookim commited on
Commit
441caa0
ยท
verified ยท
1 Parent(s): 4ebe6d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -50
app.py CHANGED
@@ -350,6 +350,7 @@ def expand_synonyms(keywords: List[str]) -> List[List[str]]:
350
 
351
  return merged
352
 
 
353
  def _dedup_keywords_preserve_order(keywords: List[str]) -> List[str]:
354
  """
355
  ๊ฒ€์ƒ‰์–ด ์ •๊ทœํ™” + ์˜๋ฏธ ์ค‘๋ณต ์ œ๊ฑฐ:
@@ -384,7 +385,6 @@ def _dedup_keywords_preserve_order(keywords: List[str]) -> List[str]:
384
  return final
385
 
386
 
387
-
388
  def build_queries(
389
  sentence: str,
390
  selected_keywords: List[str],
@@ -401,7 +401,7 @@ def build_queries(
401
  if not selected_keywords:
402
  selected_keywords = extract_candidates(sentence, max_candidates=10)[:4]
403
 
404
- selected_keywords = _dedup_keywords_preserve_order(selected_keywords) #gk
405
 
406
  # Q1: ๊ธฐ๋ณธ AND
407
  q1 = " ".join(selected_keywords).strip()
@@ -420,7 +420,7 @@ def build_queries(
420
  # g[0]์€ ์›๋ฌธ ์œ ์ง€, g[1]์ด ์žˆ์œผ๋ฉด ์น˜ํ™˜๋œ ํ›„๋ณด๋ฅผ ์šฐ์„  ์‚ฌ์šฉ
421
  combo.append(g[1] if len(g) >= 2 else g[0])
422
 
423
- combo = _dedup_keywords_preserve_order(combo) #gk
424
  q2 = " ".join(combo).strip()
425
 
426
  # Q2๊ฐ€ Q1๊ณผ ๋‹ค๋ฅด๊ณ , ๋น„์–ด์žˆ์ง€ ์•Š์œผ๋ฉด ์ถ”๊ฐ€
@@ -482,39 +482,25 @@ def aggregate_search(
482
  sort: str,
483
  ) -> Tuple[List[str], List[Dict[str, Any]]]:
484
  """
485
- ๋ฌธ์žฅ ์ž…๋ ฅ -> ํ‚ค์›Œ๋“œ ์ถ”์ถœ/ํ™•์žฅ -> ์ฟผ๋ฆฌ ๋‹ค์ค‘ ํ˜ธ์ถœ -> ํ†ตํ•ฉ/์ค‘๋ณต์ œ๊ฑฐ -> ์žฌ๋žญํ‚น
486
  ๋ฐ˜ํ™˜: (์ƒ์„ฑ๋œ ์ฟผ๋ฆฌ ๋ชฉ๋ก, ์ตœ์ข… ์•„์ดํ…œ ๋ชฉ๋ก)
487
  """
488
- # 1) ํ›„๋ณด ์ƒ์„ฑ(๊ทœ์น™/ํ†ต๊ณ„)
489
- candidates = extract_candidates(sentence, max_candidates=60)
490
-
491
- # 2) ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์ƒ์œ„ ํ‚ค์›Œ๋“œ ์„ ๋ณ„
492
- selected = select_keywords_by_embedding(sentence, candidates, top_n=10)
493
-
494
- # 3) ์ œ์™ธ์–ด ๊ฐ์ง€
495
- excludes = [] # detect_excludes(sentence)
496
 
497
- # 4) ๋‹ค์ค‘ ์ฟผ๋ฆฌ ์ƒ์„ฑ
498
- queries = build_queries(sentence, selected, excludes, max_queries=2)
499
-
500
- # 5) ์ฟผ๋ฆฌ๋ณ„ ํ˜ธ์ถœ (๊ฐ ์ฟผ๋ฆฌ๋‹น display๋ฅผ ๋‚˜๋ˆ„์–ด ํ• ๋‹นํ•˜์—ฌ ํญ์ฃผ ๋ฐฉ์ง€)
501
- # ์˜ˆ: display=20, queries=4 -> ๊ฐ 5๊ฐœ์”ฉ
502
- # ๋‹จ, ์ตœ์†Œ 3๊ฐœ๋Š” ์œ ์ง€(๋„ˆ๋ฌด ์ ์œผ๋ฉด ์˜๋ฏธ๊ฐ€ ๋–จ์–ด์ง)
503
- per_q = max(3, int(np.ceil(display / max(1, len(queries)))))
504
- per_q = min(per_q, display)
505
 
506
  all_items: List[Dict[str, Any]] = []
507
  for q in queries:
508
- data = naver_news_search(query=q, display=per_q, sort=sort, start=1)
509
  all_items.extend(data.get("items", []))
510
 
511
- # 6) ํ†ตํ•ฉ/์ค‘๋ณต ์ œ๊ฑฐ
512
  merged = dedup_items(all_items)
513
 
514
- # 7) ์ž„๋ฒ ๋”ฉ ์žฌ๋žญํ‚น
515
  reranked = rerank_items_by_embedding(sentence, merged)
516
 
517
- # 8) ์ตœ์ข… ๊ฐœ์ˆ˜ ์ ˆ๋‹จ
518
  final_items = reranked[:display]
519
  return queries, final_items
520
 
@@ -564,16 +550,15 @@ def handle_search(
564
 
565
  try:
566
  queries, items = aggregate_search(sentence=q, display=int(display), sort=sort)
567
-
568
  lines = []
569
- # lines.append(f"์งˆ์˜: {q}")
570
  lines.append("")
571
  lines.append("API ํ˜ธ์ถœ์— ์‚ฌ์šฉ๋œ ๊ฒ€์ƒ‰์–ด(query)๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:")
572
  for i, qq in enumerate(queries, start=1):
573
  lines.append(f"- Q{i}: `{qq}`")
574
  lines.append("")
575
  lines.append(render_results_from_items(items))
576
-
577
  assistant_text = "\n".join(lines).strip()
578
 
579
  except Exception as e:
@@ -585,39 +570,17 @@ def handle_search(
585
 
586
 
587
  with gr.Blocks(title="Naver News Search (Chat UI)") as demo:
588
- # gr.Markdown(
589
- # """
590
- # # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰ ํ…Œ์ŠคํŠธ (๋ฌธ์žฅ ์ž…๋ ฅ โ†’ ํ‚ค์›Œ๋“œ ์ถ”์ถœ/ํ™•์žฅ โ†’ ๋‹ค์ค‘ ์ฟผ๋ฆฌ ํ˜ธ์ถœ)
591
- # - ํ•˜๋‹จ ์ž…๋ ฅ์ฐฝ์— **๋ฌธ์žฅ ํ˜•ํƒœ**๋กœ ์ž…๋ ฅํ•˜์‹œ๋ฉด, (๊ทœ์น™/ํ†ต๊ณ„ + ์ž„๋ฒ ๋”ฉ)์œผ๋กœ ํ‚ค์›Œ๋“œ๋ฅผ ์„ ๋ณ„ยทํ™•์žฅํ•˜๊ณ ,
592
- # AND/OR/์ œ์™ธ ํ˜•ํƒœ์˜ **๋‹ค์ค‘ ์ฟผ๋ฆฌ**๋ฅผ ์ž๋™ ์ƒ์„ฑํ•˜์—ฌ ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ฒ€์ƒ‰ API๋ฅผ ํ˜ธ์ถœํ•ฉ๋‹ˆ๋‹ค.
593
- # - ์ธ์ฆํ‚ค๋Š” Hugging Face Spaces **Secrets**์˜ `NAVER_CLIENT_ID`, `NAVER_CLIENT_SECRET`์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
594
- # - ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์€ ๊ธฐ๋ณธ `jhgan/ko-sroberta-multitask`์ด๋ฉฐ, ํ•„์š” ์‹œ Secrets/ํ™˜๊ฒฝ๋ณ€์ˆ˜ `EMBEDDING_MODEL`๋กœ ๋ณ€๊ฒฝ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.
595
- # """
596
- # )
597
-
598
- # with gr.Row():
599
- # display = gr.Slider(
600
- # minimum=1, maximum=50, value=20, step=1, label="ํ‘œ์‹œ ๊ฐœ์ˆ˜(display)"
601
- # )
602
- # sort = gr.Dropdown(
603
- # choices=["date", "sim"],
604
- # value="date",
605
- # label="์ •๋ ฌ(sort)",
606
- # info="date=์ตœ์‹ ์ˆœ, sim=์ •ํ™•๋„์ˆœ",
607
- # )
608
-
609
  with gr.Accordion("๊ฒ€์ƒ‰ ์˜ต์…˜", open=False):
610
  with gr.Row():
611
  display = gr.Slider(
612
  minimum=1, maximum=100, value=20, step=1, label="ํ‘œ์‹œ ๊ฐœ์ˆ˜(display)"
613
  )
614
  sort = gr.Dropdown(
615
- choices=[("์ตœ์‹ ์ˆœ", "date"), ("์ •ํ™•๋„์ˆœ(์—ฐ๊ด€๋„์ˆœ)", "sim"),],
616
  value="date",
617
  label="์ •๋ ฌ(sort)",
618
  )
619
 
620
-
621
  chatbot = gr.Chatbot(
622
  value=[],
623
  label="NewsChat_v0.1",
 
350
 
351
  return merged
352
 
353
+
354
  def _dedup_keywords_preserve_order(keywords: List[str]) -> List[str]:
355
  """
356
  ๊ฒ€์ƒ‰์–ด ์ •๊ทœํ™” + ์˜๋ฏธ ์ค‘๋ณต ์ œ๊ฑฐ:
 
385
  return final
386
 
387
 
 
388
  def build_queries(
389
  sentence: str,
390
  selected_keywords: List[str],
 
401
  if not selected_keywords:
402
  selected_keywords = extract_candidates(sentence, max_candidates=10)[:4]
403
 
404
+ selected_keywords = _dedup_keywords_preserve_order(selected_keywords) # gk
405
 
406
  # Q1: ๊ธฐ๋ณธ AND
407
  q1 = " ".join(selected_keywords).strip()
 
420
  # g[0]์€ ์›๋ฌธ ์œ ์ง€, g[1]์ด ์žˆ์œผ๋ฉด ์น˜ํ™˜๋œ ํ›„๋ณด๋ฅผ ์šฐ์„  ์‚ฌ์šฉ
421
  combo.append(g[1] if len(g) >= 2 else g[0])
422
 
423
+ combo = _dedup_keywords_preserve_order(combo) # gk
424
  q2 = " ".join(combo).strip()
425
 
426
  # Q2๊ฐ€ Q1๊ณผ ๋‹ค๋ฅด๊ณ , ๋น„์–ด์žˆ์ง€ ์•Š์œผ๋ฉด ์ถ”๊ฐ€
 
482
  sort: str,
483
  ) -> Tuple[List[str], List[Dict[str, Any]]]:
484
  """
485
+ ๋ฌธ์žฅ ์ž…๋ ฅ -> (ํ˜„์žฌ๋Š”) ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฌธ์žฅ์„ ๊ทธ๋Œ€๋กœ query๋กœ ์‚ฌ์šฉํ•˜์—ฌ API ํ˜ธ์ถœ
486
  ๋ฐ˜ํ™˜: (์ƒ์„ฑ๋œ ์ฟผ๋ฆฌ ๋ชฉ๋ก, ์ตœ์ข… ์•„์ดํ…œ ๋ชฉ๋ก)
487
  """
 
 
 
 
 
 
 
 
488
 
489
+ # โœ… ๋ณ€๊ฒฝ๋œ ํ•ต์‹ฌ: ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฌธ์žฅ์„ ๊ทธ๋Œ€๋กœ query๋กœ ์‚ฌ์šฉ
490
+ queries = [sentence]
 
 
 
 
 
 
491
 
492
  all_items: List[Dict[str, Any]] = []
493
  for q in queries:
494
+ data = naver_news_search(query=q, display=int(display), sort=sort, start=1)
495
  all_items.extend(data.get("items", []))
496
 
497
+ # ํ†ตํ•ฉ/์ค‘๋ณต ์ œ๊ฑฐ(๋‹จ์ผ ์ฟผ๋ฆฌ๋ผ๋„ ์œ ์ง€)
498
  merged = dedup_items(all_items)
499
 
500
+ # ์ž„๋ฒ ๋”ฉ ์žฌ๋žญํ‚น(๊ธฐ์กด ๋™์ž‘ ์œ ์ง€)
501
  reranked = rerank_items_by_embedding(sentence, merged)
502
 
503
+ # ์ตœ์ข… ๊ฐœ์ˆ˜ ์ ˆ๋‹จ
504
  final_items = reranked[:display]
505
  return queries, final_items
506
 
 
550
 
551
  try:
552
  queries, items = aggregate_search(sentence=q, display=int(display), sort=sort)
553
+
554
  lines = []
 
555
  lines.append("")
556
  lines.append("API ํ˜ธ์ถœ์— ์‚ฌ์šฉ๋œ ๊ฒ€์ƒ‰์–ด(query)๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:")
557
  for i, qq in enumerate(queries, start=1):
558
  lines.append(f"- Q{i}: `{qq}`")
559
  lines.append("")
560
  lines.append(render_results_from_items(items))
561
+
562
  assistant_text = "\n".join(lines).strip()
563
 
564
  except Exception as e:
 
570
 
571
 
572
  with gr.Blocks(title="Naver News Search (Chat UI)") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  with gr.Accordion("๊ฒ€์ƒ‰ ์˜ต์…˜", open=False):
574
  with gr.Row():
575
  display = gr.Slider(
576
  minimum=1, maximum=100, value=20, step=1, label="ํ‘œ์‹œ ๊ฐœ์ˆ˜(display)"
577
  )
578
  sort = gr.Dropdown(
579
+ choices=[("์ตœ์‹ ์ˆœ", "date"), ("์ •ํ™•๋„์ˆœ(์—ฐ๊ด€๋„์ˆœ)", "sim")],
580
  value="date",
581
  label="์ •๋ ฌ(sort)",
582
  )
583
 
 
584
  chatbot = gr.Chatbot(
585
  value=[],
586
  label="NewsChat_v0.1",