Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from bs4 import BeautifulSoup | |
| import re | |
| import logging | |
| import requests | |
| # ๋๋ฒ๊น ๋ก๊น ์ค์ (์ ์ฒด ํตํฉ์ ์ํด ํฌ๋งท ํฌํจ) | |
| logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s") | |
| # ---------- [๋ชจ๋1: ๊ธฐ์กด ๊ธฐ๋ณธ์ฝ๋] ์์ ---------- | |
| # ์์ด ์์ ํ๊ตญ์ด ์๋ก ๋ณํํ๊ธฐ ์ํ ๋งคํ | |
| month_mapping = { | |
| "January": "1์", | |
| "February": "2์", | |
| "March": "3์", | |
| "April": "4์", | |
| "May": "5์", | |
| "June": "6์", | |
| "July": "7์", | |
| "August": "8์", | |
| "September": "9์", | |
| "October": "10์", | |
| "November": "11์", | |
| "December": "12์" | |
| } | |
| def convert_date_range(date_range_str): | |
| """ | |
| ์ ๋ ฅ๋ '6 January - 12 January' ํํ์ ๋ ์ง ๋ฌธ์์ด์ | |
| '1์ 6์ผ ~ 1์ 12์ผ' ํํ๋ก ๋ณํํ๋ ํจ์. | |
| """ | |
| logging.debug("์๋ณธ ๋ ์ง ๋ฒ์: %s", date_range_str) | |
| parts = date_range_str.split('-') | |
| if len(parts) != 2: | |
| logging.debug("๋ ์ง ๋ฒ์ ํ์์ด ์ฌ๋ฐ๋ฅด์ง ์์: %s", date_range_str) | |
| return date_range_str | |
| start = parts[0].strip() # ์: "6 January" | |
| end = parts[1].strip() # ์: "12 January" | |
| start_parts = start.split() | |
| end_parts = end.split() | |
| if len(start_parts) < 2 or len(end_parts) < 2: | |
| logging.debug("๋ ์ง ๊ตฌ์ฑ์์ ๋ถ์กฑ: %s, %s", start, end) | |
| return date_range_str | |
| start_day = start_parts[0] | |
| start_month_en = start_parts[1] | |
| end_day = end_parts[0] | |
| end_month_en = end_parts[1] | |
| start_month = month_mapping.get(start_month_en, start_month_en) | |
| end_month = month_mapping.get(end_month_en, end_month_en) | |
| converted = f"{start_month} {start_day}์ผ ~ {end_month} {end_day}์ผ" | |
| logging.debug("๋ณํ๋ ๋ ์ง ๋ฒ์: %s", converted) | |
| return converted | |
| def process_html(html_text): | |
| """ | |
| [๊ธฐ์กด ๊ธฐ๋ฅ] | |
| ์ ์ฒด ํ์ด์ง HTML ์ ๋ ฅ์ ๋ฐ์์ ๊ณผ๋ชฉ๋ช ๊ณผ ๊ฐ ์น์ ๋ณ ๋์์ ๊ฐ์ ๋ชฉ๋ก์ ์ถ์ถํ๋ ํจ์. | |
| """ | |
| logging.debug("์ ๋ ฅ HTML ์ฒ๋ฆฌ ์์") | |
| soup = BeautifulSoup(html_text, "html.parser") | |
| # 1. ๊ณผ๋ชฉ๋ช ์ถ์ถ (h1 ํ๊ทธ ์ฌ์ฉ) | |
| subject_elem = soup.find("h1") | |
| subject_name = "" | |
| if subject_elem: | |
| subject_name = subject_elem.get_text(strip=True) | |
| logging.debug("์ถ์ถ๋ ๊ณผ๋ชฉ๋ช : %s", subject_name) | |
| else: | |
| logging.debug("h1 ํ๊ทธ๋ฅผ ์ฐพ์ง ๋ชปํจ") | |
| # 2. ์น์ ๋ณ ๋์์ ๊ฐ์ ๋ชฉ๋ก ์ถ์ถ | |
| sections_output = "" | |
| section_elements = soup.find_all("li", id=re.compile(r"^section-\d+")) | |
| logging.debug("์ฐพ์ ์น์ ๊ฐ์: %d", len(section_elements)) | |
| for section in section_elements: | |
| section_id = section.get("id") | |
| logging.debug("์ฒ๋ฆฌ ์ค์ธ ์น์ ID: %s", section_id) | |
| sec_match = re.search(r"section-(\d+)", section_id) | |
| if not sec_match: | |
| continue | |
| sec_num = int(sec_match.group(1)) | |
| if sec_num == 0: | |
| logging.debug("section-0 ์ ์คํต") | |
| continue | |
| # ์น์ ๋ผ๋ฒจ ์ง์ | |
| if sec_num == 1: | |
| section_label = "Introduction" | |
| else: | |
| week_num = sec_num - 1 # section-2๋ถํฐ 1์ฃผ์ฐจ, section-3์ 2์ฃผ์ฐจ ๋ฑ | |
| section_label = f"{week_num}์ฃผ์ฐจ" | |
| # ์น์ ํค๋์์ ๋ ์ง ๋ฒ์ ์ถ์ถ (h3 ํ๊ทธ ๋ด <a> ํ๊ทธ ํ ์คํธ) | |
| h3_elem = section.find("h3", class_="sectionname") | |
| date_range_text = "" | |
| if h3_elem and h3_elem.find("a"): | |
| header_text = h3_elem.find("a").get_text(strip=True) | |
| logging.debug("ํค๋ ํ ์คํธ: %s", header_text) | |
| date_match = re.search(r'(\d+\s+[A-Za-z]+\s*-\s*\d+\s+[A-Za-z]+)', header_text) | |
| if date_match: | |
| raw_date_range = date_match.group(1) | |
| date_range_text = convert_date_range(raw_date_range) | |
| else: | |
| logging.debug("๋ ์ง ๋ฒ์ ํจํด ๋งค์นญ ์คํจ: %s", header_text) | |
| else: | |
| logging.debug("h3 ๋๋ h3 ๋ด a ํ๊ทธ๋ฅผ ์ฐพ์ง ๋ชปํจ for section: %s", section_id) | |
| if sec_num == 1: | |
| section_heading = f"์น์ : {section_label}" | |
| else: | |
| if date_range_text: | |
| section_heading = f"์น์ : {section_label} ({date_range_text})" | |
| else: | |
| section_heading = f"์น์ : {section_label}" | |
| sections_output += section_heading + "\n" | |
| # ํด๋น ์น์ ๋ด iframe ํ๊ทธ๋ก๋ถํฐ ๋์์ ๊ฐ์ URL ์ถ์ถ | |
| iframes = section.find_all("iframe") | |
| logging.debug("์น์ %s ๋ด ์ฐพ์ iframe ๊ฐ์: %d", section_id, len(iframes)) | |
| for idx, iframe in enumerate(iframes, start=1): | |
| video_url = iframe.get("src", "").strip() | |
| if video_url: | |
| sections_output += f"๊ฐ์{idx} : {video_url}\n" | |
| logging.debug("์ถ์ถ๋ ๋์์ ๊ฐ์ URL: %s", video_url) | |
| sections_output += "\n" | |
| logging.debug("HTML ์ฒ๋ฆฌ ์๋ฃ") | |
| return subject_name, sections_output | |
| def process_html_sections(html_text): | |
| """ | |
| ์ ๋ ฅ๋ ์ ์ฒด ํ์ด์ง HTML์์ | |
| - ๊ณผ๋ชฉ๋ช ์ ์ถ์ถํ๊ณ , | |
| - ๊ฐ ์น์ ๋ณ๋ก ๊ฐ์ ๋ชฉ๋ก(ํ ์คํธ ๋ฐ URL ๋ฆฌ์คํธ)์ ๋ถ๋ฆฌํ์ฌ ๋์ ๋๋ฆฌ(์น์ ์ ๋ชฉ: (๊ฐ์๋ชฉ๋ก ํ ์คํธ, URL๋ฆฌ์คํธ))๋ฅผ ์์ฑํ๋ ํจ์. | |
| """ | |
| logging.debug("์ ๋ ฅ HTML ์ฒ๋ฆฌ ์์ (์น์ ๋ณ ๋ถ๋ฆฌ)") | |
| soup = BeautifulSoup(html_text, "html.parser") | |
| # 1. ๊ณผ๋ชฉ๋ช ์ถ์ถ | |
| subject_elem = soup.find("h1") | |
| subject_name = "" | |
| if subject_elem: | |
| subject_name = subject_elem.get_text(strip=True) | |
| logging.debug("์ถ์ถ๋ ๊ณผ๋ชฉ๋ช : %s", subject_name) | |
| else: | |
| logging.debug("h1 ํ๊ทธ๋ฅผ ์ฐพ์ง ๋ชปํจ") | |
| sections_dict = {} | |
| section_elements = soup.find_all("li", id=re.compile(r"^section-\d+")) | |
| logging.debug("์ฐพ์ ์น์ ๊ฐ์: %d", len(section_elements)) | |
| for section in section_elements: | |
| section_id = section.get("id") | |
| logging.debug("์ฒ๋ฆฌ ์ค์ธ ์น์ ID: %s", section_id) | |
| sec_match = re.search(r"section-(\d+)", section_id) | |
| if not sec_match: | |
| continue | |
| sec_num = int(sec_match.group(1)) | |
| if sec_num == 0: | |
| logging.debug("section-0 ์ ์คํต") | |
| continue | |
| # ์น์ ๋ผ๋ฒจ ์ง์ | |
| if sec_num == 1: | |
| section_label = "์น์ : Introduction" | |
| else: | |
| week_num = sec_num - 1 | |
| section_label = f"์น์ : {week_num}์ฃผ์ฐจ" | |
| # ์น์ ํค๋์์ ๋ ์ง ๋ฒ์ ์ถ์ถ | |
| h3_elem = section.find("h3", class_="sectionname") | |
| date_range_text = "" | |
| if h3_elem and h3_elem.find("a"): | |
| header_text = h3_elem.find("a").get_text(strip=True) | |
| logging.debug("ํค๋ ํ ์คํธ: %s", header_text) | |
| date_match = re.search(r'(\d+\s+[A-Za-z]+\s*-\s*\d+\s+[A-Za-z]+)', header_text) | |
| if date_match: | |
| raw_date_range = date_match.group(1) | |
| date_range_text = convert_date_range(raw_date_range) | |
| else: | |
| logging.debug("๋ ์ง ๋ฒ์ ํจํด ๋งค์นญ ์คํจ: %s", header_text) | |
| else: | |
| logging.debug("h3 ๋๋ h3 ๋ด a ํ๊ทธ๋ฅผ ์ฐพ์ง ๋ชปํจ for section: %s", section_id) | |
| if sec_num != 1 and date_range_text: | |
| section_label += f" ({date_range_text})" | |
| # ๊ฐ์ ํ ์คํธ์ URL ๋ฆฌ์คํธ ์ถ์ถ | |
| lectures_str = "" | |
| url_list = [] | |
| iframes = section.find_all("iframe") | |
| logging.debug("์น์ %s ๋ด ์ฐพ์ iframe ๊ฐ์: %d", section_id, len(iframes)) | |
| for idx, iframe in enumerate(iframes, start=1): | |
| video_url = iframe.get("src", "").strip() | |
| if video_url: | |
| lectures_str += f"๊ฐ์{idx} : {video_url}\n" | |
| url_list.append(video_url) | |
| logging.debug("์ถ์ถ๋ ๋์์ ๊ฐ์ URL: %s", video_url) | |
| # ๊ฐ์๊ฐ ์์ผ๋ฉด ๊ฐ์ ํ ์คํธ์ ์๋ด ๋ฉ์์ง ์ถ๊ฐ | |
| if not url_list: | |
| lectures_str = "๊ฐ์๊ฐ ์์ต๋๋ค" | |
| sections_dict[section_label] = (lectures_str.strip(), url_list) | |
| logging.debug("HTML ์ฒ๋ฆฌ ์๋ฃ (์น์ ๋ณ ๋ถ๋ฆฌ)") | |
| sections_list = list(sections_dict.keys()) | |
| default_val = sections_list[0] if sections_list else None | |
| # gr.Dropdown.update() ๋์ gr.update() ์ฌ์ฉ | |
| return subject_name, gr.update(choices=sections_list, value=default_val), sections_dict | |
| def update_lecture_and_urls(selected_section, sections_dict): | |
| """ | |
| ์ ํํ ์น์ ์ ๋ฐํ์ผ๋ก ๊ฐ์ ๋ชฉ๋ก ํ ์คํธ์ URL๋ค์ ์ถ์ถํ์ฌ | |
| - ๊ฐ์ ๋ชฉ๋ก ํ ์คํธ(์์ผ๋ฉด "๊ฐ์๊ฐ ์์ต๋๋ค" ๋ฐํ) | |
| - ์ต๋ 3๊ฐ์ ๊ฐ์ URL (๊ฐ์ URL์ด ์์ผ๋ฉด ๋น ๋ฌธ์์ด) | |
| ๋ฅผ ๋ฐํ. | |
| """ | |
| if not selected_section or not sections_dict: | |
| return "", "", "", "" | |
| lectures_text, url_list = sections_dict.get(selected_section, ("", [])) | |
| if not lectures_text: | |
| lectures_text = "๊ฐ์๊ฐ ์์ต๋๋ค" | |
| # ๊ฐ์ URL 3๊ฐ ์ฑ์ฐ๊ธฐ (์์ผ๋ฉด ๋น ๋ฌธ์์ด) | |
| url1 = url_list[0] if len(url_list) >= 1 else "" | |
| url2 = url_list[1] if len(url_list) >= 2 else "" | |
| url3 = url_list[2] if len(url_list) >= 3 else "" | |
| return lectures_text, url1, url2, url3 | |
| def update_lecture_text_only(selected_section, sections_dict): | |
| """ | |
| ์ ํํ ์น์ ์ ํด๋นํ๋ ๊ฐ์๋ชฉ๋ก ํ ์คํธ๋ง ๋ฐํํ๋ ํจ์. | |
| """ | |
| if not selected_section or not sections_dict: | |
| return "" | |
| lectures_text, _ = sections_dict.get(selected_section, ("", [])) | |
| if not lectures_text: | |
| lectures_text = "๊ฐ์๊ฐ ์์ต๋๋ค" | |
| logging.debug("update_lecture_text_only - ์ ํ๋ ์น์ : %s, ๊ฐ์๋ชฉ๋ก: %s", selected_section, lectures_text) | |
| return lectures_text | |
| # ---------- [๋ชจ๋1: ๊ธฐ์กด ๊ธฐ๋ณธ์ฝ๋] ๋ ---------- | |
| # ---------- [๋ชจ๋2: ์ถ๊ฐ์ฝ๋] ์์ ---------- | |
| def fetch_page_source(url): | |
| try: | |
| logging.debug(f"๊ฐ์ ํ์ด์ง๋ฅผ ๊ฐ์ ธ์ค๋ ์ค: {url}") | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| logging.debug("ํ์ด์ง ์์ค๋ฅผ ์ฑ๊ณต์ ์ผ๋ก ๊ฐ์ ธ์ด") | |
| return response.text | |
| except Exception as e: | |
| logging.error(f"ํ์ด์ง ์์ค ๊ฐ์ ธ์ค๊ธฐ ์ค๋ฅ: {e}") | |
| return "์ค๋ฅ ๋ฐ์: " + str(e) | |
| def create_script_url(lecture_url): | |
| """ | |
| ์ ๋ ฅ๋ฐ์ ๊ฐ์ URL์ ํ์ด์ง ์์ค์์ ์คํฌ๋ฆฝํธ ํ๊ทธ ์์ ๋ถ๋ถ์ ์ฐพ์ | |
| "text_tracks" ๋ด๋ถ์ "url" ๊ฐ์ ์ถ์ถํ ํ, 'https://player.vimeo.com'์ ์์ ๋ถ์ฌ์ ์คํฌ๋ฆฝํธ URL์ ์์ฑํจ. | |
| """ | |
| page_source = fetch_page_source(lecture_url) | |
| pattern = r'"text_tracks"\s*:\s*\[\s*\{[^}]*"url"\s*:\s*"([^"]+)"' | |
| match = re.search(pattern, page_source) | |
| if match: | |
| relative_url = match.group(1) | |
| script_url = "https://player.vimeo.com" + relative_url | |
| logging.debug(f"์คํฌ๋ฆฝํธ URL ์์ฑ: {script_url}") | |
| return script_url | |
| else: | |
| logging.debug("ํ์ด์ง ์์ค์์ ์คํฌ๋ฆฝํธ ํ๊ทธ ์์ ๋ถ๋ถ์ ์ฐพ์ง ๋ชปํจ") | |
| return "" | |
| def fetch_script(script_url): | |
| try: | |
| logging.debug(f"์คํฌ๋ฆฝํธ๋ฅผ ๊ฐ์ ธ์ค๋ ์ค: {script_url}") | |
| response = requests.get(script_url) | |
| response.raise_for_status() | |
| logging.debug("์คํฌ๋ฆฝํธ๋ฅผ ์ฑ๊ณต์ ์ผ๋ก ๊ฐ์ ธ์ด") | |
| return response.text | |
| except Exception as e: | |
| logging.error(f"์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ ์ค๋ฅ: {e}") | |
| return "์ค๋ฅ ๋ฐ์: " + str(e) | |
| def remove_timeline(script_text, lecture_number): | |
| """ | |
| [๊ท์น์์ ] | |
| 1. ๋ฒํธ, ์๊ฐ(ํ์๋ผ์ธ) ๋ฑ์ ์ ์ธํ ๋ด์ฉ๋ง์ผ๋ก ์์ฑํ ๊ฒ. | |
| 2. ์๋์ ๋ฌธ์ฅ ์ฌ์ด ์ค ๊ฐ๊ฒฉ์ด ์๋๋ก ๋ชจ๋ ๋ถ์ฌ ์ถ๋ ฅํ ๊ฒ. | |
| 3. ๋ง์นจํ(.) ๋ค์์๋ ๋ฐ๋์ ์ฌ๋ฐฑ 1์นธ์ด ์์ด์ผ ํ๋ฉฐ, ์ฌ๋ฐฑ ์์ด ๋ด์ฉ์ด ์ด์ด์ง ๊ฒฝ์ฐ ๋ง์นจํ ๋ค์ ์ฌ๋ฐฑ์ ์ถ๊ฐํ ๊ฒ. | |
| 4. ๊ธ ๊ฐ์ฅ ์์ ์๋ "WEBVTT"๋ ์ญ์ ํ ๊ฒ. | |
| 5. ์ ๋ ๋ด์ฉ์ ์ค์ด๊ฑฐ๋ ์์ฝํ๊ฑฐ๋ ๋ฐ๊พธ์ง ๋ง๊ฒ. | |
| """ | |
| lines = script_text.splitlines() | |
| valid_lines = [] | |
| for line in lines: | |
| stripped_line = line.strip() | |
| if stripped_line == "": | |
| continue | |
| if re.match(r'^\d+$', stripped_line): | |
| continue | |
| if re.match(r'^\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?\s*-->\s*\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?$', stripped_line): | |
| continue | |
| valid_lines.append(stripped_line) | |
| cleaned_text = "".join(valid_lines) | |
| cleaned_text = re.sub(r'\.(\S)', r'. \1', cleaned_text) | |
| cleaned_text = re.sub(r'^WEBVTT\s*', '', cleaned_text) | |
| return cleaned_text | |
| # ์๋ก์ด ์ ์ฒด ์ฒ๋ฆฌ ํจ์: ๊ฐ์ URL -> ์คํฌ๋ฆฝํธ URL ์์ฑ -> ์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ -> ํ์๋ผ์ธ ์ ๊ฑฐ | |
| def process_full(lecture_url): | |
| """ | |
| ์ ๋ ฅ๋ ๊ฐ์ URL๋ก๋ถํฐ ์คํฌ๋ฆฝํธ URL์ ์์ฑํ๊ณ , | |
| ํด๋น ์คํฌ๋ฆฝํธ๋ฅผ ๊ฐ์ ธ์จ ํ ํ์๋ผ์ธ์ ์ ๊ฑฐํ ์ต์ข ๊ฐ์ ๋ด์ฉ์ ๋ฐํํ๋ ํจ์. | |
| (์ค๊ฐ ๋จ๊ณ๋ ์ถ๋ ฅํ์ง ์๊ณ ์ต์ข ๊ฒฐ๊ณผ๋ง ๋ฐํ) | |
| """ | |
| script_url = create_script_url(lecture_url) | |
| if not script_url: | |
| return "์คํฌ๋ฆฝํธ URL ์์ฑ ์คํจ" | |
| script_text = fetch_script(script_url) | |
| if "์ค๋ฅ ๋ฐ์" in script_text: | |
| return script_text | |
| cleaned = remove_timeline(script_text, 1) | |
| return cleaned | |
| # ---------- [๋ชจ๋2: ์ถ๊ฐ์ฝ๋] ๋ ---------- | |
| # ---------- [๋ชจ๋3: ๊ฐ์ ๋ด์ฉ ํฉ์น๊ธฐ ์ถ๊ฐ์ฝ๋] ์์ ---------- | |
| with gr.Blocks() as merge_demo: | |
| gr.Markdown("## ๊ฐ์ ๋ด์ฉ ํฉ์น๊ธฐ") | |
| # "๊ฐ์๋ด์ฉ ํฉ์น๊ธฐ" ๋ฒํผ ์ญ์ (์์ฒญ์ฌํญ์ ๋ฐ๋ผ ์ญ์ ) | |
| merged_content = gr.Textbox(label="์ ์ฒด ๊ฐ์ ๋ด์ฉ", lines=10, elem_id="merged_content") | |
| with gr.Row(): | |
| merge_copy_btn = gr.Button("์ ์ฒด ๊ฐ์ ๋ด์ฉ ๋ณต์ฌํ๊ธฐ", elem_id="merge_copy_btn") | |
| merge_copy_result = gr.Textbox(label="์ ์ฒด ๊ฐ์ ๋ด์ฉ ๋ณต์ฌ ๊ฒฐ๊ณผ", interactive=False, elem_id="merge_copy_result") | |
| def merge_contents(l1, l2, l3): | |
| merged = "" | |
| if l1.strip(): | |
| merged += "[๊ฐ์1]\n" + l1.strip() | |
| if l2.strip(): | |
| if merged: | |
| merged += "\n\n" | |
| merged += "[๊ฐ์2]\n" + l2.strip() | |
| if l3.strip(): | |
| if merged: | |
| merged += "\n\n" | |
| merged += "[๊ฐ์3]\n" + l3.strip() | |
| return merged | |
| merge_copy_script = """ | |
| <script> | |
| function setupMergeCopy(copyBtnId, textBoxId, resultBoxId) { | |
| const copyBtn = document.getElementById(copyBtnId); | |
| if (!copyBtn) { | |
| console.error("๋ฒํผ " + copyBtnId + "๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."); | |
| return; | |
| } | |
| copyBtn.addEventListener("click", function(){ | |
| const textBoxElem = document.getElementById(textBoxId); | |
| const resultBoxElem = document.getElementById(resultBoxId); | |
| if(textBoxElem && resultBoxElem) { | |
| const textarea = textBoxElem.querySelector("textarea"); | |
| const resultTextarea = resultBoxElem.querySelector("textarea"); | |
| if(textarea && resultTextarea) { | |
| var text = textarea.value; | |
| if(text.trim() === ""){ | |
| resultTextarea.value = "๋ณต์ฌํ ๋ด์ฉ์ด ์์ต๋๋ค."; | |
| } else { | |
| navigator.clipboard.writeText(text).then(function(){ | |
| resultTextarea.value = "๋ณต์ฌ์๋ฃ"; | |
| }, function(err){ | |
| resultTextarea.value = "๋ณต์ฌ ์คํจ"; | |
| }); | |
| } | |
| } | |
| } | |
| }); | |
| } | |
| document.addEventListener("DOMContentLoaded", function(){ | |
| setupMergeCopy("merge_copy_btn", "merged_content", "merge_copy_result"); | |
| }); | |
| </script> | |
| """ | |
| gr.HTML(merge_copy_script) | |
| # ---------- [๋ชจ๋3: ๊ฐ์ ๋ด์ฉ ํฉ์น๊ธฐ ์ถ๊ฐ์ฝ๋] ๋ ---------- | |
| # ---------- ์๋ก์ด ๊ธฐ๋ฅ: '์ ํํ ์น์ ๊ฐ์ ๋ชฉ๋ก'์์ ๊ฐ์ URL ์ถ์ถ ๋ฐ ๊ฐ์ ๋ด์ฉ ์๋ ์ ๋ฐ์ดํธ ๊ธฐ๋ฅ ์ถ๊ฐ ---------- | |
| def merge_contents_global(l1, l2, l3): | |
| merged = "" | |
| if l1.strip(): | |
| merged += "[๊ฐ์1]\n" + l1.strip() | |
| if l2.strip(): | |
| if merged: | |
| merged += "\n\n" | |
| merged += "[๊ฐ์2]\n" + l2.strip() | |
| if l3.strip(): | |
| if merged: | |
| merged += "\n\n" | |
| merged += "[๊ฐ์3]\n" + l3.strip() | |
| return merged | |
| def handle_fetch_all(lecture_list_text): | |
| logging.debug("์๋ก์ด '๊ฐ์ ๋ด์ฉ ๊ฐ์ ธ์ค๊ธฐ' ๋ฒํผ ํด๋ฆญ๋จ. ๊ฐ์ ๋ชฉ๋ก ํ ์คํธ: %s", lecture_list_text) | |
| lines = lecture_list_text.splitlines() | |
| urls = [] | |
| for line in lines: | |
| m = re.match(r"๊ฐ์\d+\s*:\s*(.+)", line.strip()) | |
| if m: | |
| url = m.group(1).strip() | |
| urls.append(url) | |
| logging.debug("์ถ์ถ๋ URL: %s", url) | |
| urls = urls[:3] | |
| while len(urls) < 3: | |
| urls.append("") | |
| lec_contents = [] | |
| for idx, url in enumerate(urls): | |
| if url: | |
| content = process_full(url) | |
| logging.debug("๊ฐ์ %d ๋ด์ฉ: %s", idx+1, content) | |
| else: | |
| content = "" | |
| logging.debug("๊ฐ์ %d URL์ด ๋น์ด ์์.", idx+1) | |
| lec_contents.append(content) | |
| merged = merge_contents_global(lec_contents[0], lec_contents[1], lec_contents[2]) | |
| logging.debug("์ ์ฒด ๊ฐ์ ๋ด์ฉ ๋ณํฉ ์๋ฃ.") | |
| return urls[0], urls[1], urls[2], lec_contents[0], lec_contents[1], lec_contents[2], merged | |
| # ---------- ์๋ก์ด ๊ธฐ๋ฅ ๋ ---------- | |
| # ---------- ํตํฉ Gradio ์ฑ ๊ตฌ์ฑ (ํ ํ์ด์ง์ ๋ชจ๋ ํ์) ---------- | |
| with gr.Blocks() as additional_demo: | |
| gr.Markdown("## ๊ฐ์ ๋ด์ฉ ๊ฐ์ ธ์ค๊ธฐ") | |
| with gr.Row(): | |
| url1 = gr.Textbox(label="๊ฐ์1 URL", elem_id="url1") | |
| url2 = gr.Textbox(label="๊ฐ์2 URL", elem_id="url2") | |
| url3 = gr.Textbox(label="๊ฐ์3 URL", elem_id="url3") | |
| with gr.Row(): | |
| pass | |
| with gr.Row(): | |
| lecture_content1 = gr.Textbox(label="๊ฐ์ ๋ด์ฉ", lines=10, elem_id="lecture_content1") | |
| lecture_content2 = gr.Textbox(label="๊ฐ์ ๋ด์ฉ", lines=10, elem_id="lecture_content2") | |
| lecture_content3 = gr.Textbox(label="๊ฐ์ ๋ด์ฉ", lines=10, elem_id="lecture_content3") | |
| with gr.Row(): | |
| copy_btn1 = gr.Button("๊ฐ์ ๋ด์ฉ ๋ณต์ฌํ๊ธฐ", elem_id="copy_btn1") | |
| copy_btn2 = gr.Button("๊ฐ์ ๋ด์ฉ ๋ณต์ฌํ๊ธฐ", elem_id="copy_btn2") | |
| copy_btn3 = gr.Button("๊ฐ์ ๋ด์ฉ ๋ณต์ฌํ๊ธฐ", elem_id="copy_btn3") | |
| with gr.Row(): | |
| copy_result1 = gr.Textbox(label="๊ฐ์ ๋ด์ฉ ๋ณต์ฌํ๊ธฐ ๊ฒฐ๊ณผ", interactive=False, elem_id="copy_result1") | |
| copy_result2 = gr.Textbox(label="๊ฐ์ ๋ด์ฉ ๋ณต์ฌํ๊ธฐ ๊ฒฐ๊ณผ", interactive=False, elem_id="copy_result2") | |
| copy_result3 = gr.Textbox(label="๊ฐ์ ๋ด์ฉ ๋ณต์ฌํ๊ธฐ ๊ฒฐ๊ณผ", interactive=False, elem_id="copy_result3") | |
| custom_script = """ | |
| <script> | |
| function setupCopy(copyBtnId, textBoxId, resultBoxId) { | |
| const copyBtn = document.getElementById(copyBtnId); | |
| if (!copyBtn) { | |
| console.error("๋ฒํผ " + copyBtnId + "๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."); | |
| return; | |
| } | |
| copyBtn.addEventListener("click", function(){ | |
| const textBoxElem = document.getElementById(textBoxId); | |
| const resultBoxElem = document.getElementById(resultBoxId); | |
| if(textBoxElem && resultBoxElem) { | |
| const textarea = textBoxElem.querySelector("textarea"); | |
| const resultTextarea = resultBoxElem.querySelector("textarea"); | |
| if(textarea && resultTextarea) { | |
| var text = textarea.value; | |
| if(text.trim() === ""){ | |
| resultTextarea.value = "๋ณต์ฌํ ๋ด์ฉ์ด ์์ต๋๋ค."; | |
| } else { | |
| navigator.clipboard.writeText(text).then(function(){ | |
| resultTextarea.value = "๋ณต์ฌ์๋ฃ"; | |
| }, function(err){ | |
| resultTextarea.value = "๋ณต์ฌ ์คํจ"; | |
| }); | |
| } | |
| } | |
| } | |
| }); | |
| } | |
| document.addEventListener("DOMContentLoaded", function(){ | |
| setupCopy("copy_btn1", "lecture_content1", "copy_result1"); | |
| setupCopy("copy_btn2", "lecture_content2", "copy_result2"); | |
| setupCopy("copy_btn3", "lecture_content3", "copy_result3"); | |
| }); | |
| </script> | |
| """ | |
| gr.HTML(custom_script) | |
| with gr.Blocks() as app: | |
| gr.Markdown("# ์บ๋กค๋ผ์ธ๋ํ ๊ฐ์ ์ถ์ถ๊ธฐ Ver.2.2") | |
| gr.HTML( | |
| """ | |
| <div style="background-color: #f0f0f0; padding: 10px; margin-bottom: 20px;"> | |
| <strong>์ฌ์ฉ๋ฐฉ๋ฒ</strong> | |
| <ol> | |
| <li>์ถ์ถ์ ์ํ๋ ๊ฐ์ ํ์ด์ง์์ "Ctrl + U"๋ฅผ ๋๋ฌ "ํ์ด์ง ์์ค ๋ณด๊ธฐ" ํ์ด์ง๋ฅผ ์ฝ๋๋ค.</li> | |
| <li>ํ์ด์ง ์์ค ๋ณด๊ธฐ ํ์ด์ง์ ์์ค ๋ด์ฉ์ ์ ์ฒด ๋ณต์ฌ ํฉ๋๋ค. ("Ctrl+A" โ "Ctrl+C")</li> | |
| <li>๋ณต์ฌํ ๋ด์ฉ์ ์ถ์ถ๊ธฐ์ "์ ์ฒด ํ์ด์ง HTML ์ ๋ ฅ"๋์ ๋ถ์ฌ ๋ฃ๊ณ "Submit" ๋ฒํผ์ ํด๋ฆญํฉ๋๋ค.</li> | |
| <li>์ค๋ฅธ์ชฝ ์ฐฝ์์ ์ํ๋ ์น์ ์ ์ ํ ํ "๊ฐ์ ๋ด์ฉ ๊ฐ์ ธ์ค๊ธฐ" ๋ฒํผ์ ํด๋ฆญํฉ๋๋ค.</li> | |
| <li>๊ฐ์ ธ์จ ๊ฐ์ ๋ด์ฉ ์ค์์ ํ์ํ ๋ด์ฉ๋ง ๋ณต์ฌํ์ฌ ์ฌ์ฉ ๊ฐ๋ฅํฉ๋๋ค.</li> | |
| </ol> | |
| </div> | |
| """ | |
| ) | |
| with gr.Tab("HTML ํ์ฑ ๋ฐ ์น์ ์ ํ"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| html_input = gr.Textbox(label="์ ์ฒด ํ์ด์ง HTML ์ ๋ ฅ", lines=20, placeholder="HTML ์ฝ๋๋ฅผ ์ ๋ ฅํ์ธ์...") | |
| parse_btn = gr.Button("Submit") | |
| with gr.Column(): | |
| subject_out = gr.Textbox(label="๊ณผ๋ชฉ๋ช ", interactive=False) | |
| section_dropdown = gr.Dropdown(label="์น์ ์ ํ", choices=[], interactive=True) | |
| lecture_out = gr.Textbox(label="์ ํํ ์น์ ๊ฐ์ ๋ชฉ๋ก", lines=10, interactive=False) | |
| with gr.Row(): | |
| fetch_all_btn = gr.Button("๊ฐ์ ๋ด์ฉ ๊ฐ์ ธ์ค๊ธฐ", elem_id="fetch_all_btn") | |
| sections_state = gr.State() | |
| parse_btn.click( | |
| fn=process_html_sections, | |
| inputs=html_input, | |
| outputs=[subject_out, section_dropdown, sections_state] | |
| ) | |
| section_dropdown.change( | |
| fn=update_lecture_text_only, | |
| inputs=[section_dropdown, sections_state], | |
| outputs=lecture_out | |
| ) | |
| fetch_all_btn.click( | |
| fn=handle_fetch_all, | |
| inputs=lecture_out, | |
| outputs=[url1, url2, url3, lecture_content1, lecture_content2, lecture_content3, merged_content] | |
| ) | |
| additional_demo.render() | |
| merge_demo.render() | |
| if __name__ == "__main__": | |
| logging.debug("ํตํฉ Gradio ์ฑ ์คํ ์ค") | |
| app.launch(debug=True) | |