Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -40,12 +40,13 @@ def get_youtube_script(url):
|
|
| 40 |
title = data.get("title", "μ λͺ© μμ")
|
| 41 |
description = data.get("description", "μ€λͺ
μμ")
|
| 42 |
transcription_text = data.get("transcriptionAsText", "")
|
|
|
|
| 43 |
|
| 44 |
if not transcription_text:
|
| 45 |
raise ValueError("μΆμΆλ μ€ν¬λ¦½νΈκ° μμ΅λλ€.")
|
| 46 |
|
| 47 |
logging.info("μ€ν¬λ¦½νΈ μΆμΆ μλ£")
|
| 48 |
-
return title, description, transcription_text
|
| 49 |
except Exception as e:
|
| 50 |
logging.exception("μ€ν¬λ¦½νΈ μΆμΆ μ€ μ€λ₯ λ°μ")
|
| 51 |
raise
|
|
@@ -69,7 +70,8 @@ def summarize_text(title, description, text):
|
|
| 69 |
[μ νλΈ μμ½ κ·μΉ]
|
| 70 |
1. λλ μ νλΈ μμ μ λ¬Έ ν΄μ€κ°λ‘μ μ§μΉ¨μ λ§κ² μ΄ κΈμ μμ±νλΌ
|
| 71 |
2. μλμ μ λͺ©κ³Ό μ€λͺ
μ μ΄ μ νλΈ μμμ μλ³Έ λ©νλ°μ΄ν°μ΄λ€.
|
| 72 |
-
3. λ°λμ μ λͺ©κ³Ό μ€λͺ
μΌλ‘ μ£Όμ μ
|
|
|
|
| 73 |
4. λ°λμ νκΈλ‘ μμ±νλΌ
|
| 74 |
5. λ°λμ 'μ΄ μ νλΈ λλ³Έμ', 'μ΄ μμμ', 'μ΄ μ νλΈλ'λ±μ μκ°μ ννμ μ μΈνλΌ
|
| 75 |
6. μμ½λ¬Έλ§μΌλ‘λ μμμ μ§μ μμ²ν κ²κ³Ό λμΌν μμ€μΌλ‘ λ΄μ©μ μ΄ν΄ν μ μλλ‘ μμΈν μμ±
|
|
@@ -80,7 +82,6 @@ def summarize_text(title, description, text):
|
|
| 80 |
11. λ±μ₯μΈλ¬Ό, μ₯μ, μ¬κ±΄ λ± μ€μν μμλ₯Ό μ ννκ² μμ±
|
| 81 |
12. λλ³Έμμ μ λ¬νλ κ°μ μ΄λ λΆμκΈ°λ ν¬ν¨
|
| 82 |
13. λ°λμ κΈ°μ μ μ©μ΄λ μ λ¬Έ μ©μ΄κ° μμ κ²½μ°, μ΄λ₯Ό μ ννκ² μ¬μ©
|
| 83 |
-
|
| 84 |
14. λ°λμ ν΅μ¬ μΉμ
(μμ£Όμ )λ₯Ό νμ
νμ¬ μΉμ
μ λ§κ² κΈμ μμ½νλΌ(κΈμ μμ κ³ λ €νμ¬ μΉμ
μ κ°μλ₯Ό νλ ₯μ μΌλ‘ μ€μ )
|
| 85 |
15. κ° μΉμ
μ μ λͺ©(μμ£Όμ )μλ λ΄μ©κ³Ό μ΄μΈλ¦¬λ μ μ ν μ΄λͺ¨μ§λ‘ μμ£Όμ λ₯Ό μμνλΌ
|
| 86 |
16. κ° μΉμ
μ λ΄μ©μ Bullet Pointλ₯Ό μ¬μ©νμ¬ κ°λ
μ±μ λμ¬λΌ(λ¬Έμ₯ λ¨μλ‘ κ΅¬λΆ)
|
|
@@ -125,57 +126,72 @@ def split_sentences(text):
|
|
| 125 |
def display_script(title, script):
|
| 126 |
script_sentences = split_sentences(script)
|
| 127 |
formatted_script = "\n\n".join(script_sentences)
|
| 128 |
-
return f"""<div
|
| 129 |
-
<h3>μλ¬Έ μ€ν¬λ¦½νΈ</h3>
|
| 130 |
<details>
|
| 131 |
<summary>ν΄λ¦νμ¬ νΌμΉκΈ°</summary>
|
| 132 |
-
<
|
| 133 |
-
<
|
| 134 |
</details>
|
| 135 |
</div>"""
|
| 136 |
|
| 137 |
def display_summary(title, summary):
|
| 138 |
-
return f"""<div
|
| 139 |
-
<
|
| 140 |
-
<h2>{title}</h2>
|
| 141 |
{summary}
|
| 142 |
</div>"""
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
def analyze(url):
|
| 145 |
# μ€ν¬λ¦½νΈ μΆμΆ
|
| 146 |
-
yield "μ€ν¬λ¦½νΈ μΆμΆ μ€...", "μ€ν¬λ¦½νΈ μΆμΆ μ€..."
|
| 147 |
-
title, description, script = get_youtube_script(url)
|
| 148 |
script_content = display_script(title, script)
|
|
|
|
| 149 |
|
| 150 |
# μλ¬Έ μ€ν¬λ¦½νΈ νμ λ° μμ½ μμ
|
| 151 |
-
yield script_content, "μμ½ μμ± μ€..."
|
| 152 |
|
| 153 |
# μμ½ μμ±
|
| 154 |
summary = summarize_text(title, description, script)
|
| 155 |
|
|
|
|
| 156 |
lines = summary.split('\n')
|
| 157 |
formatted_lines = []
|
| 158 |
for line in lines:
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
formatted_summary = '\n'.join(formatted_lines)
|
| 170 |
|
| 171 |
-
summary_content = f"""<div
|
| 172 |
-
<
|
| 173 |
-
<h2>{html.escape(title)}</h2>
|
| 174 |
{formatted_summary}
|
| 175 |
</div>"""
|
| 176 |
|
| 177 |
# μ΅μ’
κ²°κ³Ό νμ
|
| 178 |
-
yield script_content, summary_content
|
| 179 |
|
| 180 |
# Gradio μΈν°νμ΄μ€
|
| 181 |
with gr.Blocks() as demo:
|
|
@@ -184,11 +200,12 @@ with gr.Blocks() as demo:
|
|
| 184 |
analyze_button = gr.Button("λΆμνκΈ°")
|
| 185 |
script_output = gr.HTML(label="μλ¬Έ μ€ν¬λ¦½νΈ")
|
| 186 |
summary_output = gr.HTML(label="μμ½")
|
|
|
|
| 187 |
|
| 188 |
analyze_button.click(
|
| 189 |
analyze,
|
| 190 |
inputs=[youtube_url_input],
|
| 191 |
-
outputs=[script_output, summary_output]
|
| 192 |
)
|
| 193 |
|
| 194 |
if __name__ == "__main__":
|
|
|
|
| 40 |
title = data.get("title", "μ λͺ© μμ")
|
| 41 |
description = data.get("description", "μ€λͺ
μμ")
|
| 42 |
transcription_text = data.get("transcriptionAsText", "")
|
| 43 |
+
thumbnails = data.get("thumbnails", [])
|
| 44 |
|
| 45 |
if not transcription_text:
|
| 46 |
raise ValueError("μΆμΆλ μ€ν¬λ¦½νΈκ° μμ΅λλ€.")
|
| 47 |
|
| 48 |
logging.info("μ€ν¬λ¦½νΈ μΆμΆ μλ£")
|
| 49 |
+
return title, description, transcription_text, thumbnails
|
| 50 |
except Exception as e:
|
| 51 |
logging.exception("μ€ν¬λ¦½νΈ μΆμΆ μ€ μ€λ₯ λ°μ")
|
| 52 |
raise
|
|
|
|
| 70 |
[μ νλΈ μμ½ κ·μΉ]
|
| 71 |
1. λλ μ νλΈ μμ μ λ¬Έ ν΄μ€κ°λ‘μ μ§μΉ¨μ λ§κ² μ΄ κΈμ μμ±νλΌ
|
| 72 |
2. μλμ μ λͺ©κ³Ό μ€λͺ
μ μ΄ μ νλΈ μμμ μλ³Έ λ©νλ°μ΄ν°μ΄λ€.
|
| 73 |
+
3. λ°λμ μ λͺ©κ³Ό μ€λͺ
μΌλ‘ μ£Όμ μ λ¬Έλ§₯, μ² μ(Spelling)μ λ¨Όμ νμ
νκ³ , μλμ λλ³Έμ λ°λμ μ§μΉ¨μ λ§κ² μμΈνκ² μμ½νλΌ
|
| 74 |
+
- λ°λμ μ£Όμ΄μ§ μ λͺ©, μ€λͺ
μ μλ μ² μ(Spelling)λ₯Ό μμ½μ λ°μνλΌ(μλ¬Έ λλ³Έμλ μ€νμκ° μμ μ μλ€)
|
| 75 |
4. λ°λμ νκΈλ‘ μμ±νλΌ
|
| 76 |
5. λ°λμ 'μ΄ μ νλΈ λλ³Έμ', 'μ΄ μμμ', 'μ΄ μ νλΈλ'λ±μ μκ°μ ννμ μ μΈνλΌ
|
| 77 |
6. μμ½λ¬Έλ§μΌλ‘λ μμμ μ§μ μμ²ν κ²κ³Ό λμΌν μμ€μΌλ‘ λ΄μ©μ μ΄ν΄ν μ μλλ‘ μμΈν μμ±
|
|
|
|
| 82 |
11. λ±μ₯μΈλ¬Ό, μ₯μ, μ¬κ±΄ λ± μ€μν μμλ₯Ό μ ννκ² μμ±
|
| 83 |
12. λλ³Έμμ μ λ¬νλ κ°μ μ΄λ λΆμκΈ°λ ν¬ν¨
|
| 84 |
13. λ°λμ κΈ°μ μ μ©μ΄λ μ λ¬Έ μ©μ΄κ° μμ κ²½μ°, μ΄λ₯Ό μ ννκ² μ¬μ©
|
|
|
|
| 85 |
14. λ°λμ ν΅μ¬ μΉμ
(μμ£Όμ )λ₯Ό νμ
νμ¬ μΉμ
μ λ§κ² κΈμ μμ½νλΌ(κΈμ μμ κ³ λ €νμ¬ μΉμ
μ κ°μλ₯Ό νλ ₯μ μΌλ‘ μ€μ )
|
| 86 |
15. κ° μΉμ
μ μ λͺ©(μμ£Όμ )μλ λ΄μ©κ³Ό μ΄μΈλ¦¬λ μ μ ν μ΄λͺ¨μ§λ‘ μμ£Όμ λ₯Ό μμνλΌ
|
| 87 |
16. κ° μΉμ
μ λ΄μ©μ Bullet Pointλ₯Ό μ¬μ©νμ¬ κ°λ
μ±μ λμ¬λΌ(λ¬Έμ₯ λ¨μλ‘ κ΅¬λΆ)
|
|
|
|
| 126 |
def display_script(title, script):
|
| 127 |
script_sentences = split_sentences(script)
|
| 128 |
formatted_script = "\n\n".join(script_sentences)
|
| 129 |
+
return f"""<div class="script-box">
|
|
|
|
| 130 |
<details>
|
| 131 |
<summary>ν΄λ¦νμ¬ νΌμΉκΈ°</summary>
|
| 132 |
+
<div class="output-title">{title}</div>
|
| 133 |
+
<p style="white-space: pre-wrap;">{formatted_script}</p>
|
| 134 |
</details>
|
| 135 |
</div>"""
|
| 136 |
|
| 137 |
def display_summary(title, summary):
|
| 138 |
+
return f"""<div class="script-box">
|
| 139 |
+
<div class="output-title">{title}</div>
|
|
|
|
| 140 |
{summary}
|
| 141 |
</div>"""
|
| 142 |
|
| 143 |
+
def get_thumbnail_url(thumbnails):
|
| 144 |
+
for thumbnail in thumbnails:
|
| 145 |
+
if thumbnail.get("width") == 640 and thumbnail.get("height") == 480:
|
| 146 |
+
return thumbnail.get("url")
|
| 147 |
+
return "640x480 ν¬κΈ°μ μΈλ€μΌμ μ°Ύμ μ μμ΅λλ€."
|
| 148 |
+
|
| 149 |
def analyze(url):
|
| 150 |
# μ€ν¬λ¦½νΈ μΆμΆ
|
| 151 |
+
yield "μ€ν¬λ¦½νΈ μΆμΆ μ€...", "μ€ν¬λ¦½νΈ μΆμΆ μ€...", ""
|
| 152 |
+
title, description, script, thumbnails = get_youtube_script(url)
|
| 153 |
script_content = display_script(title, script)
|
| 154 |
+
thumbnail_url = get_thumbnail_url(thumbnails)
|
| 155 |
|
| 156 |
# μλ¬Έ μ€ν¬λ¦½νΈ νμ λ° μμ½ μμ
|
| 157 |
+
yield script_content, "μμ½ μμ± μ€...", thumbnail_url
|
| 158 |
|
| 159 |
# μμ½ μμ±
|
| 160 |
summary = summarize_text(title, description, script)
|
| 161 |
|
| 162 |
+
# HTMLλ‘ λ³ν (convert_to_html λ‘μ§μ μ§μ ν΅ν©)
|
| 163 |
lines = summary.split('\n')
|
| 164 |
formatted_lines = []
|
| 165 |
for line in lines:
|
| 166 |
+
line = line.strip()
|
| 167 |
+
if line.startswith('####'):
|
| 168 |
+
formatted_lines.append(f"<h4>{html.escape(line[4:].strip())}</h4>")
|
| 169 |
+
elif line.startswith('###'):
|
| 170 |
+
formatted_lines.append(f"<h3>{html.escape(line[3:].strip())}</h3>")
|
| 171 |
+
elif line.startswith('##'):
|
| 172 |
+
formatted_lines.append(f"<h2>{html.escape(line[2:].strip())}</h2>")
|
| 173 |
+
elif line.startswith('#'):
|
| 174 |
+
formatted_lines.append(f"<h1>{html.escape(line[1:].strip())}</h1>")
|
| 175 |
+
elif line.startswith('- '): # 리μ€νΈ μμ΄ν
|
| 176 |
+
content = html.escape(line[2:])
|
| 177 |
+
bold_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
|
| 178 |
+
formatted_lines.append(f"<li>{bold_content}</li>")
|
| 179 |
+
elif line: # μΌλ° ν
μ€νΈ (λΉ μ€ μ μΈ)
|
| 180 |
+
content = html.escape(line)
|
| 181 |
+
bold_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
|
| 182 |
+
formatted_lines.append(f"<p>{bold_content}</p>")
|
| 183 |
+
else: # λΉ μ€
|
| 184 |
+
formatted_lines.append("<br>")
|
| 185 |
|
| 186 |
formatted_summary = '\n'.join(formatted_lines)
|
| 187 |
|
| 188 |
+
summary_content = f"""<div class="script-box">
|
| 189 |
+
<div class="output-title">{html.escape(title)}</div>
|
|
|
|
| 190 |
{formatted_summary}
|
| 191 |
</div>"""
|
| 192 |
|
| 193 |
# μ΅μ’
κ²°κ³Ό νμ
|
| 194 |
+
yield script_content, summary_content, thumbnail_url
|
| 195 |
|
| 196 |
# Gradio μΈν°νμ΄μ€
|
| 197 |
with gr.Blocks() as demo:
|
|
|
|
| 200 |
analyze_button = gr.Button("λΆμνκΈ°")
|
| 201 |
script_output = gr.HTML(label="μλ¬Έ μ€ν¬λ¦½νΈ")
|
| 202 |
summary_output = gr.HTML(label="μμ½")
|
| 203 |
+
thumbnail_output = gr.Textbox(label="μΈλ€μΌ URL (640x480)") # μ΄ μ€ μΆκ°
|
| 204 |
|
| 205 |
analyze_button.click(
|
| 206 |
analyze,
|
| 207 |
inputs=[youtube_url_input],
|
| 208 |
+
outputs=[script_output, summary_output, thumbnail_output] # thumbnail_output μΆκ°
|
| 209 |
)
|
| 210 |
|
| 211 |
if __name__ == "__main__":
|