unnastyle's picture
Update app.py
92c0854 verified
import gradio as gr
from bs4 import BeautifulSoup
import re
import logging
import requests
# ๋””๋ฒ„๊น… ๋กœ๊น… ์„ค์ • (์ „์ฒด ํ†ตํ•ฉ์„ ์œ„ํ•ด ํฌ๋งท ํฌํ•จ)
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
# ---------- [๋ชจ๋“ˆ1: ๊ธฐ์กด ๊ธฐ๋ณธ์ฝ”๋“œ] ์‹œ์ž‘ ----------
# ์˜์–ด ์›”์„ ํ•œ๊ตญ์–ด ์›”๋กœ ๋ณ€ํ™˜ํ•˜๊ธฐ ์œ„ํ•œ ๋งคํ•‘
month_mapping = {
"January": "1์›”",
"February": "2์›”",
"March": "3์›”",
"April": "4์›”",
"May": "5์›”",
"June": "6์›”",
"July": "7์›”",
"August": "8์›”",
"September": "9์›”",
"October": "10์›”",
"November": "11์›”",
"December": "12์›”"
}
def convert_date_range(date_range_str):
"""
์ž…๋ ฅ๋œ '6 January - 12 January' ํ˜•ํƒœ์˜ ๋‚ ์งœ ๋ฌธ์ž์—ด์„
'1์›” 6์ผ ~ 1์›” 12์ผ' ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜.
"""
logging.debug("์›๋ณธ ๋‚ ์งœ ๋ฒ”์œ„: %s", date_range_str)
parts = date_range_str.split('-')
if len(parts) != 2:
logging.debug("๋‚ ์งœ ๋ฒ”์œ„ ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Œ: %s", date_range_str)
return date_range_str
start = parts[0].strip() # ์˜ˆ: "6 January"
end = parts[1].strip() # ์˜ˆ: "12 January"
start_parts = start.split()
end_parts = end.split()
if len(start_parts) < 2 or len(end_parts) < 2:
logging.debug("๋‚ ์งœ ๊ตฌ์„ฑ์š”์†Œ ๋ถ€์กฑ: %s, %s", start, end)
return date_range_str
start_day = start_parts[0]
start_month_en = start_parts[1]
end_day = end_parts[0]
end_month_en = end_parts[1]
start_month = month_mapping.get(start_month_en, start_month_en)
end_month = month_mapping.get(end_month_en, end_month_en)
converted = f"{start_month} {start_day}์ผ ~ {end_month} {end_day}์ผ"
logging.debug("๋ณ€ํ™˜๋œ ๋‚ ์งœ ๋ฒ”์œ„: %s", converted)
return converted
def process_html(html_text):
"""
[๊ธฐ์กด ๊ธฐ๋Šฅ]
์ „์ฒด ํŽ˜์ด์ง€ HTML ์ž…๋ ฅ์„ ๋ฐ›์•„์„œ ๊ณผ๋ชฉ๋ช…๊ณผ ๊ฐ ์„น์…˜๋ณ„ ๋™์˜์ƒ ๊ฐ•์˜ ๋ชฉ๋ก์„ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜.
"""
logging.debug("์ž…๋ ฅ HTML ์ฒ˜๋ฆฌ ์‹œ์ž‘")
soup = BeautifulSoup(html_text, "html.parser")
# 1. ๊ณผ๋ชฉ๋ช… ์ถ”์ถœ (h1 ํƒœ๊ทธ ์‚ฌ์šฉ)
subject_elem = soup.find("h1")
subject_name = ""
if subject_elem:
subject_name = subject_elem.get_text(strip=True)
logging.debug("์ถ”์ถœ๋œ ๊ณผ๋ชฉ๋ช…: %s", subject_name)
else:
logging.debug("h1 ํƒœ๊ทธ๋ฅผ ์ฐพ์ง€ ๋ชปํ•จ")
# 2. ์„น์…˜๋ณ„ ๋™์˜์ƒ ๊ฐ•์˜ ๋ชฉ๋ก ์ถ”์ถœ
sections_output = ""
section_elements = soup.find_all("li", id=re.compile(r"^section-\d+"))
logging.debug("์ฐพ์€ ์„น์…˜ ๊ฐœ์ˆ˜: %d", len(section_elements))
for section in section_elements:
section_id = section.get("id")
logging.debug("์ฒ˜๋ฆฌ ์ค‘์ธ ์„น์…˜ ID: %s", section_id)
sec_match = re.search(r"section-(\d+)", section_id)
if not sec_match:
continue
sec_num = int(sec_match.group(1))
if sec_num == 0:
logging.debug("section-0 ์€ ์Šคํ‚ต")
continue
# ์„น์…˜ ๋ผ๋ฒจ ์ง€์ •
if sec_num == 1:
section_label = "Introduction"
else:
week_num = sec_num - 1 # section-2๋ถ€ํ„ฐ 1์ฃผ์ฐจ, section-3์€ 2์ฃผ์ฐจ ๋“ฑ
section_label = f"{week_num}์ฃผ์ฐจ"
# ์„น์…˜ ํ—ค๋”์—์„œ ๋‚ ์งœ ๋ฒ”์œ„ ์ถ”์ถœ (h3 ํƒœ๊ทธ ๋‚ด <a> ํƒœ๊ทธ ํ…์ŠคํŠธ)
h3_elem = section.find("h3", class_="sectionname")
date_range_text = ""
if h3_elem and h3_elem.find("a"):
header_text = h3_elem.find("a").get_text(strip=True)
logging.debug("ํ—ค๋” ํ…์ŠคํŠธ: %s", header_text)
date_match = re.search(r'(\d+\s+[A-Za-z]+\s*-\s*\d+\s+[A-Za-z]+)', header_text)
if date_match:
raw_date_range = date_match.group(1)
date_range_text = convert_date_range(raw_date_range)
else:
logging.debug("๋‚ ์งœ ๋ฒ”์œ„ ํŒจํ„ด ๋งค์นญ ์‹คํŒจ: %s", header_text)
else:
logging.debug("h3 ๋˜๋Š” h3 ๋‚ด a ํƒœ๊ทธ๋ฅผ ์ฐพ์ง€ ๋ชปํ•จ for section: %s", section_id)
if sec_num == 1:
section_heading = f"์„น์…˜ : {section_label}"
else:
if date_range_text:
section_heading = f"์„น์…˜ : {section_label} ({date_range_text})"
else:
section_heading = f"์„น์…˜ : {section_label}"
sections_output += section_heading + "\n"
# ํ•ด๋‹น ์„น์…˜ ๋‚ด iframe ํƒœ๊ทธ๋กœ๋ถ€ํ„ฐ ๋™์˜์ƒ ๊ฐ•์˜ URL ์ถ”์ถœ
iframes = section.find_all("iframe")
logging.debug("์„น์…˜ %s ๋‚ด ์ฐพ์€ iframe ๊ฐœ์ˆ˜: %d", section_id, len(iframes))
for idx, iframe in enumerate(iframes, start=1):
video_url = iframe.get("src", "").strip()
if video_url:
sections_output += f"๊ฐ•์˜{idx} : {video_url}\n"
logging.debug("์ถ”์ถœ๋œ ๋™์˜์ƒ ๊ฐ•์˜ URL: %s", video_url)
sections_output += "\n"
logging.debug("HTML ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
return subject_name, sections_output
def process_html_sections(html_text):
"""
์ž…๋ ฅ๋œ ์ „์ฒด ํŽ˜์ด์ง€ HTML์—์„œ
- ๊ณผ๋ชฉ๋ช…์„ ์ถ”์ถœํ•˜๊ณ ,
- ๊ฐ ์„น์…˜๋ณ„๋กœ ๊ฐ•์˜ ๋ชฉ๋ก(ํ…์ŠคํŠธ ๋ฐ URL ๋ฆฌ์ŠคํŠธ)์„ ๋ถ„๋ฆฌํ•˜์—ฌ ๋”•์…”๋„ˆ๋ฆฌ(์„น์…˜ ์ œ๋ชฉ: (๊ฐ•์˜๋ชฉ๋ก ํ…์ŠคํŠธ, URL๋ฆฌ์ŠคํŠธ))๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜.
"""
logging.debug("์ž…๋ ฅ HTML ์ฒ˜๋ฆฌ ์‹œ์ž‘ (์„น์…˜๋ณ„ ๋ถ„๋ฆฌ)")
soup = BeautifulSoup(html_text, "html.parser")
# 1. ๊ณผ๋ชฉ๋ช… ์ถ”์ถœ
subject_elem = soup.find("h1")
subject_name = ""
if subject_elem:
subject_name = subject_elem.get_text(strip=True)
logging.debug("์ถ”์ถœ๋œ ๊ณผ๋ชฉ๋ช…: %s", subject_name)
else:
logging.debug("h1 ํƒœ๊ทธ๋ฅผ ์ฐพ์ง€ ๋ชปํ•จ")
sections_dict = {}
section_elements = soup.find_all("li", id=re.compile(r"^section-\d+"))
logging.debug("์ฐพ์€ ์„น์…˜ ๊ฐœ์ˆ˜: %d", len(section_elements))
for section in section_elements:
section_id = section.get("id")
logging.debug("์ฒ˜๋ฆฌ ์ค‘์ธ ์„น์…˜ ID: %s", section_id)
sec_match = re.search(r"section-(\d+)", section_id)
if not sec_match:
continue
sec_num = int(sec_match.group(1))
if sec_num == 0:
logging.debug("section-0 ์€ ์Šคํ‚ต")
continue
# ์„น์…˜ ๋ผ๋ฒจ ์ง€์ •
if sec_num == 1:
section_label = "์„น์…˜ : Introduction"
else:
week_num = sec_num - 1
section_label = f"์„น์…˜ : {week_num}์ฃผ์ฐจ"
# ์„น์…˜ ํ—ค๋”์—์„œ ๋‚ ์งœ ๋ฒ”์œ„ ์ถ”์ถœ
h3_elem = section.find("h3", class_="sectionname")
date_range_text = ""
if h3_elem and h3_elem.find("a"):
header_text = h3_elem.find("a").get_text(strip=True)
logging.debug("ํ—ค๋” ํ…์ŠคํŠธ: %s", header_text)
date_match = re.search(r'(\d+\s+[A-Za-z]+\s*-\s*\d+\s+[A-Za-z]+)', header_text)
if date_match:
raw_date_range = date_match.group(1)
date_range_text = convert_date_range(raw_date_range)
else:
logging.debug("๋‚ ์งœ ๋ฒ”์œ„ ํŒจํ„ด ๋งค์นญ ์‹คํŒจ: %s", header_text)
else:
logging.debug("h3 ๋˜๋Š” h3 ๋‚ด a ํƒœ๊ทธ๋ฅผ ์ฐพ์ง€ ๋ชปํ•จ for section: %s", section_id)
if sec_num != 1 and date_range_text:
section_label += f" ({date_range_text})"
# ๊ฐ•์˜ ํ…์ŠคํŠธ์™€ URL ๋ฆฌ์ŠคํŠธ ์ถ”์ถœ
lectures_str = ""
url_list = []
iframes = section.find_all("iframe")
logging.debug("์„น์…˜ %s ๋‚ด ์ฐพ์€ iframe ๊ฐœ์ˆ˜: %d", section_id, len(iframes))
for idx, iframe in enumerate(iframes, start=1):
video_url = iframe.get("src", "").strip()
if video_url:
lectures_str += f"๊ฐ•์˜{idx} : {video_url}\n"
url_list.append(video_url)
logging.debug("์ถ”์ถœ๋œ ๋™์˜์ƒ ๊ฐ•์˜ URL: %s", video_url)
# ๊ฐ•์˜๊ฐ€ ์—†์œผ๋ฉด ๊ฐ•์˜ ํ…์ŠคํŠธ์— ์•ˆ๋‚ด ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
if not url_list:
lectures_str = "๊ฐ•์˜๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค"
sections_dict[section_label] = (lectures_str.strip(), url_list)
logging.debug("HTML ์ฒ˜๋ฆฌ ์™„๋ฃŒ (์„น์…˜๋ณ„ ๋ถ„๋ฆฌ)")
sections_list = list(sections_dict.keys())
default_val = sections_list[0] if sections_list else None
# gr.Dropdown.update() ๋Œ€์‹  gr.update() ์‚ฌ์šฉ
return subject_name, gr.update(choices=sections_list, value=default_val), sections_dict
def update_lecture_and_urls(selected_section, sections_dict):
"""
์„ ํƒํ•œ ์„น์…˜์„ ๋ฐ”ํƒ•์œผ๋กœ ๊ฐ•์˜ ๋ชฉ๋ก ํ…์ŠคํŠธ์™€ URL๋“ค์„ ์ถ”์ถœํ•˜์—ฌ
- ๊ฐ•์˜ ๋ชฉ๋ก ํ…์ŠคํŠธ(์—†์œผ๋ฉด "๊ฐ•์˜๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค" ๋ฐ˜ํ™˜)
- ์ตœ๋Œ€ 3๊ฐœ์˜ ๊ฐ•์˜ URL (๊ฐ•์˜ URL์ด ์—†์œผ๋ฉด ๋นˆ ๋ฌธ์ž์—ด)
๋ฅผ ๋ฐ˜ํ™˜.
"""
if not selected_section or not sections_dict:
return "", "", "", ""
lectures_text, url_list = sections_dict.get(selected_section, ("", []))
if not lectures_text:
lectures_text = "๊ฐ•์˜๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค"
# ๊ฐ•์˜ URL 3๊ฐœ ์ฑ„์šฐ๊ธฐ (์—†์œผ๋ฉด ๋นˆ ๋ฌธ์ž์—ด)
url1 = url_list[0] if len(url_list) >= 1 else ""
url2 = url_list[1] if len(url_list) >= 2 else ""
url3 = url_list[2] if len(url_list) >= 3 else ""
return lectures_text, url1, url2, url3
def update_lecture_text_only(selected_section, sections_dict):
"""
์„ ํƒํ•œ ์„น์…˜์— ํ•ด๋‹นํ•˜๋Š” ๊ฐ•์˜๋ชฉ๋ก ํ…์ŠคํŠธ๋งŒ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜.
"""
if not selected_section or not sections_dict:
return ""
lectures_text, _ = sections_dict.get(selected_section, ("", []))
if not lectures_text:
lectures_text = "๊ฐ•์˜๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค"
logging.debug("update_lecture_text_only - ์„ ํƒ๋œ ์„น์…˜: %s, ๊ฐ•์˜๋ชฉ๋ก: %s", selected_section, lectures_text)
return lectures_text
# ---------- [๋ชจ๋“ˆ1: ๊ธฐ์กด ๊ธฐ๋ณธ์ฝ”๋“œ] ๋ ----------
# ---------- [๋ชจ๋“ˆ2: ์ถ”๊ฐ€์ฝ”๋“œ] ์‹œ์ž‘ ----------
def fetch_page_source(url):
try:
logging.debug(f"๊ฐ•์˜ ํŽ˜์ด์ง€๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ์ค‘: {url}")
response = requests.get(url)
response.raise_for_status()
logging.debug("ํŽ˜์ด์ง€ ์†Œ์Šค๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๊ฐ€์ ธ์˜ด")
return response.text
except Exception as e:
logging.error(f"ํŽ˜์ด์ง€ ์†Œ์Šค ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}")
return "์˜ค๋ฅ˜ ๋ฐœ์ƒ: " + str(e)
def create_script_url(lecture_url):
"""
์ž…๋ ฅ๋ฐ›์€ ๊ฐ•์˜ URL์˜ ํŽ˜์ด์ง€ ์†Œ์Šค์—์„œ ์Šคํฌ๋ฆฝํŠธ ํƒœ๊ทธ ์˜ˆ์‹œ ๋ถ€๋ถ„์„ ์ฐพ์•„
"text_tracks" ๋‚ด๋ถ€์˜ "url" ๊ฐ’์„ ์ถ”์ถœํ•œ ํ›„, 'https://player.vimeo.com'์„ ์•ž์— ๋ถ™์—ฌ์„œ ์Šคํฌ๋ฆฝํŠธ URL์„ ์™„์„ฑํ•จ.
"""
page_source = fetch_page_source(lecture_url)
pattern = r'"text_tracks"\s*:\s*\[\s*\{[^}]*"url"\s*:\s*"([^"]+)"'
match = re.search(pattern, page_source)
if match:
relative_url = match.group(1)
script_url = "https://player.vimeo.com" + relative_url
logging.debug(f"์Šคํฌ๋ฆฝํŠธ URL ์™„์„ฑ: {script_url}")
return script_url
else:
logging.debug("ํŽ˜์ด์ง€ ์†Œ์Šค์—์„œ ์Šคํฌ๋ฆฝํŠธ ํƒœ๊ทธ ์˜ˆ์‹œ ๋ถ€๋ถ„์„ ์ฐพ์ง€ ๋ชปํ•จ")
return ""
def fetch_script(script_url):
try:
logging.debug(f"์Šคํฌ๋ฆฝํŠธ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ์ค‘: {script_url}")
response = requests.get(script_url)
response.raise_for_status()
logging.debug("์Šคํฌ๋ฆฝํŠธ๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๊ฐ€์ ธ์˜ด")
return response.text
except Exception as e:
logging.error(f"์Šคํฌ๋ฆฝํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {e}")
return "์˜ค๋ฅ˜ ๋ฐœ์ƒ: " + str(e)
def remove_timeline(script_text, lecture_number):
"""
[๊ทœ์น™์ˆ˜์ •]
1. ๋ฒˆํ˜ธ, ์‹œ๊ฐ„(ํƒ€์ž„๋ผ์ธ) ๋“ฑ์„ ์ œ์™ธํ•œ ๋‚ด์šฉ๋งŒ์œผ๋กœ ์ž‘์„ฑํ•  ๊ฒƒ.
2. ์•„๋ž˜์œ„ ๋ฌธ์žฅ ์‚ฌ์ด ์ค„ ๊ฐ„๊ฒฉ์ด ์—†๋„๋ก ๋ชจ๋‘ ๋ถ™์—ฌ ์ถœ๋ ฅํ•  ๊ฒƒ.
3. ๋งˆ์นจํ‘œ(.) ๋‹ค์Œ์—๋Š” ๋ฐ˜๋“œ์‹œ ์—ฌ๋ฐฑ 1์นธ์ด ์žˆ์–ด์•ผ ํ•˜๋ฉฐ, ์—ฌ๋ฐฑ ์—†์ด ๋‚ด์šฉ์ด ์ด์–ด์ง„ ๊ฒฝ์šฐ ๋งˆ์นจํ‘œ ๋‹ค์Œ ์—ฌ๋ฐฑ์„ ์ถ”๊ฐ€ํ•  ๊ฒƒ.
4. ๊ธ€ ๊ฐ€์žฅ ์•ž์— ์žˆ๋Š” "WEBVTT"๋Š” ์‚ญ์ œํ•  ๊ฒƒ.
5. ์ ˆ๋Œ€ ๋‚ด์šฉ์„ ์ค„์ด๊ฑฐ๋‚˜ ์š”์•ฝํ•˜๊ฑฐ๋‚˜ ๋ฐ”๊พธ์ง€ ๋ง๊ฒƒ.
"""
lines = script_text.splitlines()
valid_lines = []
for line in lines:
stripped_line = line.strip()
if stripped_line == "":
continue
if re.match(r'^\d+$', stripped_line):
continue
if re.match(r'^\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?\s*-->\s*\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?$', stripped_line):
continue
valid_lines.append(stripped_line)
cleaned_text = "".join(valid_lines)
cleaned_text = re.sub(r'\.(\S)', r'. \1', cleaned_text)
cleaned_text = re.sub(r'^WEBVTT\s*', '', cleaned_text)
return cleaned_text
# ์ƒˆ๋กœ์šด ์ „์ฒด ์ฒ˜๋ฆฌ ํ•จ์ˆ˜: ๊ฐ•์˜ URL -> ์Šคํฌ๋ฆฝํŠธ URL ์ƒ์„ฑ -> ์Šคํฌ๋ฆฝํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ -> ํƒ€์ž„๋ผ์ธ ์ œ๊ฑฐ
def process_full(lecture_url):
"""
์ž…๋ ฅ๋œ ๊ฐ•์˜ URL๋กœ๋ถ€ํ„ฐ ์Šคํฌ๋ฆฝํŠธ URL์„ ์ƒ์„ฑํ•˜๊ณ ,
ํ•ด๋‹น ์Šคํฌ๋ฆฝํŠธ๋ฅผ ๊ฐ€์ ธ์˜จ ํ›„ ํƒ€์ž„๋ผ์ธ์„ ์ œ๊ฑฐํ•œ ์ตœ์ข… ๊ฐ•์˜ ๋‚ด์šฉ์„ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜.
(์ค‘๊ฐ„ ๋‹จ๊ณ„๋Š” ์ถœ๋ ฅํ•˜์ง€ ์•Š๊ณ  ์ตœ์ข… ๊ฒฐ๊ณผ๋งŒ ๋ฐ˜ํ™˜)
"""
script_url = create_script_url(lecture_url)
if not script_url:
return "์Šคํฌ๋ฆฝํŠธ URL ์ƒ์„ฑ ์‹คํŒจ"
script_text = fetch_script(script_url)
if "์˜ค๋ฅ˜ ๋ฐœ์ƒ" in script_text:
return script_text
cleaned = remove_timeline(script_text, 1)
return cleaned
# ---------- [๋ชจ๋“ˆ2: ์ถ”๊ฐ€์ฝ”๋“œ] ๋ ----------
# ---------- [๋ชจ๋“ˆ3: ๊ฐ•์˜ ๋‚ด์šฉ ํ•ฉ์น˜๊ธฐ ์ถ”๊ฐ€์ฝ”๋“œ] ์‹œ์ž‘ ----------
with gr.Blocks() as merge_demo:
gr.Markdown("## ๊ฐ•์˜ ๋‚ด์šฉ ํ•ฉ์น˜๊ธฐ")
# "๊ฐ•์˜๋‚ด์šฉ ํ•ฉ์น˜๊ธฐ" ๋ฒ„ํŠผ ์‚ญ์ œ (์š”์ฒญ์‚ฌํ•ญ์— ๋”ฐ๋ผ ์‚ญ์ œ)
merged_content = gr.Textbox(label="์ „์ฒด ๊ฐ•์˜ ๋‚ด์šฉ", lines=10, elem_id="merged_content")
with gr.Row():
merge_copy_btn = gr.Button("์ „์ฒด ๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌํ•˜๊ธฐ", elem_id="merge_copy_btn")
merge_copy_result = gr.Textbox(label="์ „์ฒด ๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌ ๊ฒฐ๊ณผ", interactive=False, elem_id="merge_copy_result")
def merge_contents(l1, l2, l3):
merged = ""
if l1.strip():
merged += "[๊ฐ•์˜1]\n" + l1.strip()
if l2.strip():
if merged:
merged += "\n\n"
merged += "[๊ฐ•์˜2]\n" + l2.strip()
if l3.strip():
if merged:
merged += "\n\n"
merged += "[๊ฐ•์˜3]\n" + l3.strip()
return merged
merge_copy_script = """
<script>
function setupMergeCopy(copyBtnId, textBoxId, resultBoxId) {
const copyBtn = document.getElementById(copyBtnId);
if (!copyBtn) {
console.error("๋ฒ„ํŠผ " + copyBtnId + "๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.");
return;
}
copyBtn.addEventListener("click", function(){
const textBoxElem = document.getElementById(textBoxId);
const resultBoxElem = document.getElementById(resultBoxId);
if(textBoxElem && resultBoxElem) {
const textarea = textBoxElem.querySelector("textarea");
const resultTextarea = resultBoxElem.querySelector("textarea");
if(textarea && resultTextarea) {
var text = textarea.value;
if(text.trim() === ""){
resultTextarea.value = "๋ณต์‚ฌํ•  ๋‚ด์šฉ์ด ์—†์Šต๋‹ˆ๋‹ค.";
} else {
navigator.clipboard.writeText(text).then(function(){
resultTextarea.value = "๋ณต์‚ฌ์™„๋ฃŒ";
}, function(err){
resultTextarea.value = "๋ณต์‚ฌ ์‹คํŒจ";
});
}
}
}
});
}
document.addEventListener("DOMContentLoaded", function(){
setupMergeCopy("merge_copy_btn", "merged_content", "merge_copy_result");
});
</script>
"""
gr.HTML(merge_copy_script)
# ---------- [๋ชจ๋“ˆ3: ๊ฐ•์˜ ๋‚ด์šฉ ํ•ฉ์น˜๊ธฐ ์ถ”๊ฐ€์ฝ”๋“œ] ๋ ----------
# ---------- ์ƒˆ๋กœ์šด ๊ธฐ๋Šฅ: '์„ ํƒํ•œ ์„น์…˜ ๊ฐ•์˜ ๋ชฉ๋ก'์—์„œ ๊ฐ•์˜ URL ์ถ”์ถœ ๋ฐ ๊ฐ•์˜ ๋‚ด์šฉ ์ž๋™ ์—…๋ฐ์ดํŠธ ๊ธฐ๋Šฅ ์ถ”๊ฐ€ ----------
def merge_contents_global(l1, l2, l3):
merged = ""
if l1.strip():
merged += "[๊ฐ•์˜1]\n" + l1.strip()
if l2.strip():
if merged:
merged += "\n\n"
merged += "[๊ฐ•์˜2]\n" + l2.strip()
if l3.strip():
if merged:
merged += "\n\n"
merged += "[๊ฐ•์˜3]\n" + l3.strip()
return merged
def handle_fetch_all(lecture_list_text):
logging.debug("์ƒˆ๋กœ์šด '๊ฐ•์˜ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ' ๋ฒ„ํŠผ ํด๋ฆญ๋จ. ๊ฐ•์˜ ๋ชฉ๋ก ํ…์ŠคํŠธ: %s", lecture_list_text)
lines = lecture_list_text.splitlines()
urls = []
for line in lines:
m = re.match(r"๊ฐ•์˜\d+\s*:\s*(.+)", line.strip())
if m:
url = m.group(1).strip()
urls.append(url)
logging.debug("์ถ”์ถœ๋œ URL: %s", url)
urls = urls[:3]
while len(urls) < 3:
urls.append("")
lec_contents = []
for idx, url in enumerate(urls):
if url:
content = process_full(url)
logging.debug("๊ฐ•์˜ %d ๋‚ด์šฉ: %s", idx+1, content)
else:
content = ""
logging.debug("๊ฐ•์˜ %d URL์ด ๋น„์–ด ์žˆ์Œ.", idx+1)
lec_contents.append(content)
merged = merge_contents_global(lec_contents[0], lec_contents[1], lec_contents[2])
logging.debug("์ „์ฒด ๊ฐ•์˜ ๋‚ด์šฉ ๋ณ‘ํ•ฉ ์™„๋ฃŒ.")
return urls[0], urls[1], urls[2], lec_contents[0], lec_contents[1], lec_contents[2], merged
# ---------- ์ƒˆ๋กœ์šด ๊ธฐ๋Šฅ ๋ ----------
# ---------- ํ†ตํ•ฉ Gradio ์•ฑ ๊ตฌ์„ฑ (ํ•œ ํŽ˜์ด์ง€์— ๋ชจ๋‘ ํ‘œ์‹œ) ----------
with gr.Blocks() as additional_demo:
gr.Markdown("## ๊ฐ•์˜ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ")
with gr.Row():
url1 = gr.Textbox(label="๊ฐ•์˜1 URL", elem_id="url1")
url2 = gr.Textbox(label="๊ฐ•์˜2 URL", elem_id="url2")
url3 = gr.Textbox(label="๊ฐ•์˜3 URL", elem_id="url3")
with gr.Row():
pass
with gr.Row():
lecture_content1 = gr.Textbox(label="๊ฐ•์˜ ๋‚ด์šฉ", lines=10, elem_id="lecture_content1")
lecture_content2 = gr.Textbox(label="๊ฐ•์˜ ๋‚ด์šฉ", lines=10, elem_id="lecture_content2")
lecture_content3 = gr.Textbox(label="๊ฐ•์˜ ๋‚ด์šฉ", lines=10, elem_id="lecture_content3")
with gr.Row():
copy_btn1 = gr.Button("๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌํ•˜๊ธฐ", elem_id="copy_btn1")
copy_btn2 = gr.Button("๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌํ•˜๊ธฐ", elem_id="copy_btn2")
copy_btn3 = gr.Button("๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌํ•˜๊ธฐ", elem_id="copy_btn3")
with gr.Row():
copy_result1 = gr.Textbox(label="๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌํ•˜๊ธฐ ๊ฒฐ๊ณผ", interactive=False, elem_id="copy_result1")
copy_result2 = gr.Textbox(label="๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌํ•˜๊ธฐ ๊ฒฐ๊ณผ", interactive=False, elem_id="copy_result2")
copy_result3 = gr.Textbox(label="๊ฐ•์˜ ๋‚ด์šฉ ๋ณต์‚ฌํ•˜๊ธฐ ๊ฒฐ๊ณผ", interactive=False, elem_id="copy_result3")
custom_script = """
<script>
function setupCopy(copyBtnId, textBoxId, resultBoxId) {
const copyBtn = document.getElementById(copyBtnId);
if (!copyBtn) {
console.error("๋ฒ„ํŠผ " + copyBtnId + "๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.");
return;
}
copyBtn.addEventListener("click", function(){
const textBoxElem = document.getElementById(textBoxId);
const resultBoxElem = document.getElementById(resultBoxId);
if(textBoxElem && resultBoxElem) {
const textarea = textBoxElem.querySelector("textarea");
const resultTextarea = resultBoxElem.querySelector("textarea");
if(textarea && resultTextarea) {
var text = textarea.value;
if(text.trim() === ""){
resultTextarea.value = "๋ณต์‚ฌํ•  ๋‚ด์šฉ์ด ์—†์Šต๋‹ˆ๋‹ค.";
} else {
navigator.clipboard.writeText(text).then(function(){
resultTextarea.value = "๋ณต์‚ฌ์™„๋ฃŒ";
}, function(err){
resultTextarea.value = "๋ณต์‚ฌ ์‹คํŒจ";
});
}
}
}
});
}
document.addEventListener("DOMContentLoaded", function(){
setupCopy("copy_btn1", "lecture_content1", "copy_result1");
setupCopy("copy_btn2", "lecture_content2", "copy_result2");
setupCopy("copy_btn3", "lecture_content3", "copy_result3");
});
</script>
"""
gr.HTML(custom_script)
with gr.Blocks() as app:
gr.Markdown("# ์บ๋กค๋ผ์ธ๋Œ€ํ•™ ๊ฐ•์˜ ์ถ”์ถœ๊ธฐ Ver.2.2")
gr.HTML(
"""
<div style="background-color: #f0f0f0; padding: 10px; margin-bottom: 20px;">
<strong>์‚ฌ์šฉ๋ฐฉ๋ฒ•</strong>
<ol>
<li>์ถ”์ถœ์„ ์›ํ•˜๋Š” ๊ฐ•์˜ ํŽ˜์ด์ง€์—์„œ "Ctrl + U"๋ฅผ ๋ˆŒ๋Ÿฌ "ํŽ˜์ด์ง€ ์†Œ์Šค ๋ณด๊ธฐ" ํŽ˜์ด์ง€๋ฅผ ์—ฝ๋‹ˆ๋‹ค.</li>
<li>ํŽ˜์ด์ง€ ์†Œ์Šค ๋ณด๊ธฐ ํŽ˜์ด์ง€์˜ ์†Œ์Šค ๋‚ด์šฉ์„ ์ „์ฒด ๋ณต์‚ฌ ํ•ฉ๋‹ˆ๋‹ค. ("Ctrl+A" โ†’ "Ctrl+C")</li>
<li>๋ณต์‚ฌํ•œ ๋‚ด์šฉ์„ ์ถ”์ถœ๊ธฐ์˜ "์ „์ฒด ํŽ˜์ด์ง€ HTML ์ž…๋ ฅ"๋ž€์— ๋ถ™์—ฌ ๋„ฃ๊ณ  "Submit" ๋ฒ„ํŠผ์„ ํด๋ฆญํ•ฉ๋‹ˆ๋‹ค.</li>
<li>์˜ค๋ฅธ์ชฝ ์ฐฝ์—์„œ ์›ํ•˜๋Š” ์„น์…˜์„ ์„ ํƒ ํ›„ "๊ฐ•์˜ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ" ๋ฒ„ํŠผ์„ ํด๋ฆญํ•ฉ๋‹ˆ๋‹ค.</li>
<li>๊ฐ€์ ธ์˜จ ๊ฐ•์˜ ๋‚ด์šฉ ์ค‘์—์„œ ํ•„์š”ํ•œ ๋‚ด์šฉ๋งŒ ๋ณต์‚ฌํ•˜์—ฌ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.</li>
</ol>
</div>
"""
)
with gr.Tab("HTML ํŒŒ์‹ฑ ๋ฐ ์„น์…˜ ์„ ํƒ"):
with gr.Row():
with gr.Column():
html_input = gr.Textbox(label="์ „์ฒด ํŽ˜์ด์ง€ HTML ์ž…๋ ฅ", lines=20, placeholder="HTML ์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...")
parse_btn = gr.Button("Submit")
with gr.Column():
subject_out = gr.Textbox(label="๊ณผ๋ชฉ๋ช…", interactive=False)
section_dropdown = gr.Dropdown(label="์„น์…˜ ์„ ํƒ", choices=[], interactive=True)
lecture_out = gr.Textbox(label="์„ ํƒํ•œ ์„น์…˜ ๊ฐ•์˜ ๋ชฉ๋ก", lines=10, interactive=False)
with gr.Row():
fetch_all_btn = gr.Button("๊ฐ•์˜ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ", elem_id="fetch_all_btn")
sections_state = gr.State()
parse_btn.click(
fn=process_html_sections,
inputs=html_input,
outputs=[subject_out, section_dropdown, sections_state]
)
section_dropdown.change(
fn=update_lecture_text_only,
inputs=[section_dropdown, sections_state],
outputs=lecture_out
)
fetch_all_btn.click(
fn=handle_fetch_all,
inputs=lecture_out,
outputs=[url1, url2, url3, lecture_content1, lecture_content2, lecture_content3, merged_content]
)
additional_demo.render()
merge_demo.render()
if __name__ == "__main__":
logging.debug("ํ†ตํ•ฉ Gradio ์•ฑ ์‹คํ–‰ ์ค‘")
app.launch(debug=True)