yoon2566 commited on
Commit
e2db192
ยท
verified ยท
1 Parent(s): 38b78b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -22
app.py CHANGED
@@ -1,29 +1,48 @@
1
-
2
  import gradio as gr
3
  from youtube_transcript_api import YouTubeTranscriptApi
 
4
 
5
- def extract_script(url):
6
- # '=' ๊ธฐํ˜ธ ์ดํ›„ ํ…์ŠคํŠธ ์ถ”์ถœ
7
- try:
8
- video_id = url.split('=')[-1]
9
-
10
- # ์ž๋ง‰(์Šคํฌ๋ฆฝํŠธ) ๊ฐ€์ ธ์˜ค๊ธฐ
11
- srt = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
 
12
 
13
- # ์ž๋ง‰ ํ…์ŠคํŠธ ํ•ฉ์น˜๊ธฐ
14
- script = "\n".join([i["text"] for i in srt])
15
- return script
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  except Exception as e:
17
- return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}"
18
-
19
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
20
- with gr.Blocks() as app:
21
- gr.Markdown("## ์œ ํŠœ๋ธŒ ์Šคํฌ๋ฆฝํŠธ ์ถ”์ถœ๊ธฐ")
22
- url_input = gr.Textbox(label="์œ ํŠœ๋ธŒ URL", placeholder="์˜ˆ์ œ https://www.youtube.com/watch?v=HtMVLneHZR0")
23
- output = gr.Textbox(label="์Šคํฌ๋ฆฝํŠธ ์ถœ๋ ฅ")
24
- extract_button = gr.Button("์Šคํฌ๋ฆฝํŠธ ์ถ”์ถœ")
25
 
26
- extract_button.click(extract_script, inputs=url_input, outputs=output)
 
 
 
 
 
 
 
27
 
28
- # ์›น ์•ฑ ์‹คํ–‰
29
- app.launch()
 
 
 
1
  import gradio as gr
2
  from youtube_transcript_api import YouTubeTranscriptApi
3
+ import re
4
 
5
+ def extract_video_id(url):
6
+ """YouTube URL์—์„œ ๋น„๋””์˜ค ID๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜"""
7
+ video_id = None
8
+ if 'youtube.com/watch?v=' in url:
9
+ video_id = url.split('youtube.com/watch?v=')[1][:11]
10
+ elif 'youtu.be/' in url:
11
+ video_id = url.split('youtu.be/')[1][:11]
12
+ return video_id
13
 
14
+ def get_transcript(url):
15
+ """YouTube ์˜์ƒ์˜ ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜"""
16
+ try:
17
+ video_id = extract_video_id(url)
18
+ if not video_id:
19
+ return "์˜ฌ๋ฐ”๋ฅธ YouTube URL์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
20
+
21
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko', 'en'])
22
+
23
+ # ์ „์ฒด ์Šคํฌ๋ฆฝํŠธ ํ…์ŠคํŠธ ๊ตฌ์„ฑ
24
+ full_transcript = ""
25
+ for transcript in transcript_list:
26
+ text = transcript['text']
27
+ timestamp = transcript['start']
28
+ minutes = int(timestamp // 60)
29
+ seconds = int(timestamp % 60)
30
+ full_transcript += f"[{minutes:02d}:{seconds:02d}] {text}\n"
31
+
32
+ return full_transcript
33
+
34
  except Exception as e:
35
+ return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
 
 
 
 
 
 
 
36
 
37
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
38
+ iface = gr.Interface(
39
+ fn=get_transcript,
40
+ inputs=gr.Textbox(label="YouTube URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
41
+ outputs=gr.Textbox(label="์ถ”์ถœ๋œ ์Šคํฌ๋ฆฝํŠธ", lines=10),
42
+ title="YouTube ์Šคํฌ๋ฆฝํŠธ ์ถ”์ถœ๊ธฐ",
43
+ description="YouTube ์˜์ƒ์˜ URL์„ ์ž…๋ ฅํ•˜๋ฉด ์ž๋™์œผ๋กœ ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค."
44
+ )
45
 
46
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
47
+ if __name__ == "__main__":
48
+ iface.launch()