Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,17 +60,22 @@ def remove_timeline(script_text, lecture_number):
|
|
| 60 |
stripped_line = line.strip()
|
| 61 |
if stripped_line == "":
|
| 62 |
continue
|
|
|
|
| 63 |
if re.match(r'^\d+$', stripped_line):
|
| 64 |
continue
|
|
|
|
| 65 |
if re.match(r'^\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?\s*-->\s*\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?$', stripped_line):
|
| 66 |
continue
|
| 67 |
valid_lines.append(stripped_line)
|
| 68 |
cleaned_text = "".join(valid_lines)
|
|
|
|
| 69 |
cleaned_text = re.sub(r'\.(\S)', r'. \1', cleaned_text)
|
|
|
|
| 70 |
cleaned_text = re.sub(r'^WEBVTT\s*', '', cleaned_text)
|
| 71 |
return cleaned_text
|
| 72 |
|
| 73 |
def get_script_urls(url1, url2, url3):
|
|
|
|
| 74 |
urls = []
|
| 75 |
for url in [url1, url2, url3]:
|
| 76 |
if url.strip() == "":
|
|
@@ -80,6 +85,7 @@ def get_script_urls(url1, url2, url3):
|
|
| 80 |
return urls[0], urls[1], urls[2]
|
| 81 |
|
| 82 |
def get_scripts(script_url1, script_url2, script_url3):
|
|
|
|
| 83 |
scripts = []
|
| 84 |
for url in [script_url1, script_url2, script_url3]:
|
| 85 |
if url.strip() == "":
|
|
@@ -89,6 +95,7 @@ def get_scripts(script_url1, script_url2, script_url3):
|
|
| 89 |
return scripts[0], scripts[1], scripts[2]
|
| 90 |
|
| 91 |
def remove_all_timelines(script1, script2, script3):
|
|
|
|
| 92 |
cleaned = []
|
| 93 |
for i, script in enumerate([script1, script2, script3], start=1):
|
| 94 |
if script.strip() == "":
|
|
@@ -97,42 +104,67 @@ def remove_all_timelines(script1, script2, script3):
|
|
| 97 |
cleaned.append(remove_timeline(script, i))
|
| 98 |
return cleaned[0], cleaned[1], cleaned[2]
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
gr.Markdown("## ๊ฐ์ ์คํฌ๋ฆฝํธ ์ฒ๋ฆฌ ์ธํฐํ์ด์ค")
|
| 107 |
gr.Markdown("โป ๊ฐ ๊ฐ์ URL์ ์
๋ ฅ ํ, '์คํฌ๋ฆฝํธ URL ๋ง๋ค๊ธฐ' ๋ฒํผ์ ํด๋ฆญํ์ฌ ์คํฌ๋ฆฝํธ URL์ ์์ฑํ๊ณ , ์ด์ด์ ์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ ๋ฐ ํ์๋ผ์ธ ์ ๊ฑฐ ๊ธฐ๋ฅ์ ์ฌ์ฉํ์ธ์.")
|
| 108 |
|
|
|
|
| 109 |
with gr.Row():
|
| 110 |
url1 = gr.Textbox(label="๊ฐ์1 URL")
|
| 111 |
url2 = gr.Textbox(label="๊ฐ์2 URL")
|
| 112 |
url3 = gr.Textbox(label="๊ฐ์3 URL")
|
| 113 |
|
|
|
|
| 114 |
generate_script_url_button = gr.Button("์คํฌ๋ฆฝํธ URL ๋ง๋ค๊ธฐ")
|
| 115 |
|
|
|
|
| 116 |
with gr.Row():
|
| 117 |
script_url1 = gr.Textbox(label="๊ฐ์1 ์คํฌ๋ฆฝํธ URL", interactive=False)
|
| 118 |
script_url2 = gr.Textbox(label="๊ฐ์2 ์คํฌ๋ฆฝํธ URL", interactive=False)
|
| 119 |
script_url3 = gr.Textbox(label="๊ฐ์3 ์คํฌ๋ฆฝํธ URL", interactive=False)
|
| 120 |
|
|
|
|
| 121 |
fetch_script_button = gr.Button("์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ")
|
| 122 |
|
|
|
|
| 123 |
with gr.Row():
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
script3 = gr.Textbox(label="๊ฐ์3 ์คํฌ๋ฆฝํธ", lines=10, interactive=True, elem_classes="scrollbox")
|
| 128 |
|
|
|
|
| 129 |
remove_timeline_button = gr.Button("ํ์๋ผ์ธ ์ ๊ฑฐ")
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
with gr.Row():
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
|
|
|
|
| 136 |
generate_script_url_button.click(
|
| 137 |
fn=get_script_urls,
|
| 138 |
inputs=[url1, url2, url3],
|
|
@@ -148,6 +180,9 @@ with gr.Blocks(css="""
|
|
| 148 |
inputs=[script1, script2, script3],
|
| 149 |
outputs=[cleaned1, cleaned2, cleaned3]
|
| 150 |
)
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
gr.Markdown("๋๋ฒ๊น
๋ชจ๋ ํ์ฑํ๋จ: ๋ก๊ทธ๊ฐ ์ฝ์์ ์ถ๋ ฅ๋ฉ๋๋ค.")
|
| 153 |
|
|
|
|
| 60 |
stripped_line = line.strip()
|
| 61 |
if stripped_line == "":
|
| 62 |
continue
|
| 63 |
+
# ๋ฒํธ๋ง ์๋ ์ค ์ ๊ฑฐ
|
| 64 |
if re.match(r'^\d+$', stripped_line):
|
| 65 |
continue
|
| 66 |
+
# ํ์๋ผ์ธ ํ์ ์ ๊ฑฐ (์: 00:00:00.000 --> 00:00:02.000)
|
| 67 |
if re.match(r'^\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?\s*-->\s*\d{1,2}:\d{2}(?::\d{2}(?:\.\d{3})?)?$', stripped_line):
|
| 68 |
continue
|
| 69 |
valid_lines.append(stripped_line)
|
| 70 |
cleaned_text = "".join(valid_lines)
|
| 71 |
+
# ๋ง์นจํ(.) ๋ค์์ ์ฌ๋ฐฑ์ด ์์ผ๋ฉด ์ฌ๋ฐฑ ์ถ๊ฐ
|
| 72 |
cleaned_text = re.sub(r'\.(\S)', r'. \1', cleaned_text)
|
| 73 |
+
# ๊ธ ๊ฐ์ฅ ์์ ์๋ "WEBVTT" ์ ๊ฑฐ
|
| 74 |
cleaned_text = re.sub(r'^WEBVTT\s*', '', cleaned_text)
|
| 75 |
return cleaned_text
|
| 76 |
|
| 77 |
def get_script_urls(url1, url2, url3):
|
| 78 |
+
"""์
๋ ฅ๋ ๊ฐ์ URL ๊ฐ๊ฐ์ ๋ํด ์คํฌ๋ฆฝํธ URL์ ์์ฑํจ (๋น ์นธ์ ๋ฌด์)."""
|
| 79 |
urls = []
|
| 80 |
for url in [url1, url2, url3]:
|
| 81 |
if url.strip() == "":
|
|
|
|
| 85 |
return urls[0], urls[1], urls[2]
|
| 86 |
|
| 87 |
def get_scripts(script_url1, script_url2, script_url3):
|
| 88 |
+
"""์
๋ ฅ๋ ์คํฌ๋ฆฝํธ URL ๊ฐ๊ฐ์ ๋ํด ์คํฌ๋ฆฝํธ๋ฅผ ๊ฐ์ ธ์ด (๋น ์นธ์ ๋ฌด์)."""
|
| 89 |
scripts = []
|
| 90 |
for url in [script_url1, script_url2, script_url3]:
|
| 91 |
if url.strip() == "":
|
|
|
|
| 95 |
return scripts[0], scripts[1], scripts[2]
|
| 96 |
|
| 97 |
def remove_all_timelines(script1, script2, script3):
|
| 98 |
+
"""๊ฐ์ ธ์จ ์คํฌ๋ฆฝํธ ๊ฐ๊ฐ์ ๋ํด ํ์๋ผ์ธ ์ ๊ฑฐ ๊ธฐ๋ฅ ์ํ (๋น ์นธ์ ๋ฌด์)."""
|
| 99 |
cleaned = []
|
| 100 |
for i, script in enumerate([script1, script2, script3], start=1):
|
| 101 |
if script.strip() == "":
|
|
|
|
| 104 |
cleaned.append(remove_timeline(script, i))
|
| 105 |
return cleaned[0], cleaned[1], cleaned[2]
|
| 106 |
|
| 107 |
+
def copy_content(text):
|
| 108 |
+
"""
|
| 109 |
+
๋ฒํผ ํด๋ฆญ ์ ํ
์คํธ๋ฅผ ๋ฉ๋ชจ๋ฆฌ์ ๋ณต์ฌํ๋ ๋์์ ๋ชจ๋ฐฉํ๊ณ ,
|
| 110 |
+
๋ณต์ฌ ํ ๊ฒฐ๊ณผ ๋ฉ์์ง๋ฅผ ๋ฐํํจ.
|
| 111 |
+
"""
|
| 112 |
+
if text.strip() == "":
|
| 113 |
+
return "๋ณต์ฌํ ๋ด์ฉ์ด ์์ต๋๋ค."
|
| 114 |
+
# ์ฌ๊ธฐ์ ์ค์ ๋ก ๋ณต์ฌ ๊ธฐ๋ฅ์ ๊ตฌํํ๋ ค๋ฉด ํด๋ผ์ด์ธํธ ์ฌ์ด๋ ์๋ฐ์คํฌ๋ฆฝํธ๊ฐ ํ์ํ์ง๋ง,
|
| 115 |
+
# ์๋ฒ์ธก์์๋ ๋จ์ํ ๋ฉ์์ง๋ฅผ ๋ฐํํ๋ ๊ฒ์ผ๋ก ์ฒ๋ฆฌํฉ๋๋ค.
|
| 116 |
+
return "๋ณต์ฌ์๋ฃ"
|
| 117 |
+
|
| 118 |
+
with gr.Blocks() as demo:
|
| 119 |
gr.Markdown("## ๊ฐ์ ์คํฌ๋ฆฝํธ ์ฒ๋ฆฌ ์ธํฐํ์ด์ค")
|
| 120 |
gr.Markdown("โป ๊ฐ ๊ฐ์ URL์ ์
๋ ฅ ํ, '์คํฌ๋ฆฝํธ URL ๋ง๋ค๊ธฐ' ๋ฒํผ์ ํด๋ฆญํ์ฌ ์คํฌ๋ฆฝํธ URL์ ์์ฑํ๊ณ , ์ด์ด์ ์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ ๋ฐ ํ์๋ผ์ธ ์ ๊ฑฐ ๊ธฐ๋ฅ์ ์ฌ์ฉํ์ธ์.")
|
| 121 |
|
| 122 |
+
# ์
๋ ฅ์ฐฝ 3๊ฐ: ๊ฐ์1 URL, ๊ฐ์2 URL, ๊ฐ์3 URL (๊ฐ๋ก๋ก ๋์ด)
|
| 123 |
with gr.Row():
|
| 124 |
url1 = gr.Textbox(label="๊ฐ์1 URL")
|
| 125 |
url2 = gr.Textbox(label="๊ฐ์2 URL")
|
| 126 |
url3 = gr.Textbox(label="๊ฐ์3 URL")
|
| 127 |
|
| 128 |
+
# ๋ฒํผ 1๊ฐ: ์คํฌ๋ฆฝํธ URL ๋ง๋ค๊ธฐ
|
| 129 |
generate_script_url_button = gr.Button("์คํฌ๋ฆฝํธ URL ๋ง๋ค๊ธฐ")
|
| 130 |
|
| 131 |
+
# ์ถ๋ ฅ์ฐฝ 3๊ฐ: ๊ฐ์1 ์คํฌ๋ฆฝํธ URL, ๊ฐ์2 ์คํฌ๋ฆฝํธ URL, ๊ฐ์3 ์คํฌ๋ฆฝํธ URL (๊ฐ๋ก๋ก ๋์ด)
|
| 132 |
with gr.Row():
|
| 133 |
script_url1 = gr.Textbox(label="๊ฐ์1 ์คํฌ๋ฆฝํธ URL", interactive=False)
|
| 134 |
script_url2 = gr.Textbox(label="๊ฐ์2 ์คํฌ๋ฆฝํธ URL", interactive=False)
|
| 135 |
script_url3 = gr.Textbox(label="๊ฐ์3 ์คํฌ๋ฆฝํธ URL", interactive=False)
|
| 136 |
|
| 137 |
+
# ๋ฒํผ 1๊ฐ: ์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ
|
| 138 |
fetch_script_button = gr.Button("์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ")
|
| 139 |
|
| 140 |
+
# ์ถ๋ ฅ์ฐฝ 3๊ฐ: ๊ฐ์1 ์คํฌ๋ฆฝํธ, ๊ฐ์2 ์คํฌ๋ฆฝํธ, ๊ฐ์3 ์คํฌ๋ฆฝํธ (๊ฐ๋ก๋ก ๋์ด)
|
| 141 |
with gr.Row():
|
| 142 |
+
script1 = gr.Textbox(label="๊ฐ์1 ์คํฌ๋ฆฝํธ", lines=10)
|
| 143 |
+
script2 = gr.Textbox(label="๊ฐ์2 ์คํฌ๋ฆฝํธ", lines=10)
|
| 144 |
+
script3 = gr.Textbox(label="๊ฐ์3 ์คํฌ๋ฆฝํธ", lines=10)
|
|
|
|
| 145 |
|
| 146 |
+
# ๋ฒํผ 1๊ฐ: ํ์๋ผ์ธ ์ ๊ฑฐ
|
| 147 |
remove_timeline_button = gr.Button("ํ์๋ผ์ธ ์ ๊ฑฐ")
|
| 148 |
|
| 149 |
+
# ์ถ๋ ฅ์ฐฝ 3๊ฐ: ๊ฐ์1 ํ์๋ผ์ธ ์ ๊ฑฐ, ๊ฐ์2 ํ์๋ผ์ธ ์ ๊ฑฐ, ๊ฐ์3 ํ์๋ผ์ธ ์ ๊ฑฐ (๊ฐ๋ก๋ก ๋์ด)
|
| 150 |
+
with gr.Row():
|
| 151 |
+
cleaned1 = gr.Textbox(label="๊ฐ์1 ํ์๋ผ์ธ ์ ๊ฑฐ", lines=10)
|
| 152 |
+
cleaned2 = gr.Textbox(label="๊ฐ์2 ํ์๋ผ์ธ ์ ๊ฑฐ", lines=10)
|
| 153 |
+
cleaned3 = gr.Textbox(label="๊ฐ์3 ํ์๋ผ์ธ ์ ๊ฑฐ", lines=10)
|
| 154 |
+
|
| 155 |
+
# "๋ด์ฉ ๋ณต์ฌํ๊ธฐ" ๋ฒํผ 3๊ฐ (๊ฐ ๊ฐ์๋ณ)
|
| 156 |
+
with gr.Row():
|
| 157 |
+
copy1 = gr.Button("๋ด์ฉ ๋ณต์ฌํ๊ธฐ")
|
| 158 |
+
copy2 = gr.Button("๋ด์ฉ ๋ณต์ฌํ๊ธฐ")
|
| 159 |
+
copy3 = gr.Button("๋ด์ฉ ๋ณต์ฌํ๊ธฐ")
|
| 160 |
+
|
| 161 |
+
# ๋ณต์ฌ ๊ฒฐ๊ณผ๋ฅผ ํ์ํ ์ถ๋ ฅ์ฐฝ 3๊ฐ
|
| 162 |
with gr.Row():
|
| 163 |
+
copy_result1 = gr.Textbox(label="๊ฐ์1 ๋ณต์ฌ ๊ฒฐ๊ณผ", interactive=False)
|
| 164 |
+
copy_result2 = gr.Textbox(label="๊ฐ์2 ๋ณต์ฌ ๊ฒฐ๊ณผ", interactive=False)
|
| 165 |
+
copy_result3 = gr.Textbox(label="๊ฐ์3 ๋ณต์ฌ ๊ฒฐ๊ณผ", interactive=False)
|
| 166 |
|
| 167 |
+
# ๊ฐ ๋ฒํผ ํด๋ฆญ ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
| 168 |
generate_script_url_button.click(
|
| 169 |
fn=get_script_urls,
|
| 170 |
inputs=[url1, url2, url3],
|
|
|
|
| 180 |
inputs=[script1, script2, script3],
|
| 181 |
outputs=[cleaned1, cleaned2, cleaned3]
|
| 182 |
)
|
| 183 |
+
copy1.click(fn=copy_content, inputs=cleaned1, outputs=copy_result1)
|
| 184 |
+
copy2.click(fn=copy_content, inputs=cleaned2, outputs=copy_result2)
|
| 185 |
+
copy3.click(fn=copy_content, inputs=cleaned3, outputs=copy_result3)
|
| 186 |
|
| 187 |
gr.Markdown("๋๋ฒ๊น
๋ชจ๋ ํ์ฑํ๋จ: ๋ก๊ทธ๊ฐ ์ฝ์์ ์ถ๋ ฅ๋ฉ๋๋ค.")
|
| 188 |
|