| import os |
| import requests |
| import json |
| import gradio as gr |
| import re |
|
|
| |
| AA_KEY = os.getenv("AA_KEY") |
| AA_HOST = "youtube-transcriptor.p.rapidapi.com" |
|
|
| |
| LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh'] |
|
|
| |
| def get_video_id(youtube_url): |
| |
| video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url) |
| |
| |
| if not video_id_match: |
| video_id_match = re.search(r"(?<=shorts/)[^#&?]*", youtube_url) |
| |
| return video_id_match.group(0) if video_id_match else None |
|
|
| |
| def get_youtube_transcript(youtube_url): |
| try: |
| |
| video_id = get_video_id(youtube_url) |
| if video_id is None: |
| return {"error": "μλͺ»λ μ νλΈ URLμ
λλ€."} |
| |
| url = "https://youtube-transcriptor.p.rapidapi.com/transcript" |
| headers = { |
| "x-rapidapi-key": AA_KEY, |
| "x-rapidapi-host": AA_HOST |
| } |
| |
| |
| for lang in LANGUAGE_PRIORITY: |
| querystring = {"video_id": video_id, "lang": lang} |
| response = requests.get(url, headers=headers, params=querystring) |
| |
| if response.status_code == 200: |
| data = response.json() |
| if data and not isinstance(data, str) and "error" not in data: |
| return {"language": lang, "data": data} |
| if isinstance(data, dict) and "availableLangs" in data: |
| available_langs = data["availableLangs"] |
| |
| for available_lang in available_langs: |
| querystring = {"video_id": video_id, "lang": available_lang} |
| response = requests.get(url, headers=headers, params=querystring) |
| if response.status_code == 200: |
| data = response.json() |
| if data and not isinstance(data, str) and "error" not in data: |
| return {"language": available_lang, "data": data} |
| |
| |
| return {"error": "μλ§μ μ°Ύμ μ μμ΅λλ€."} |
| |
| except Exception as e: |
| return {"error": "μλ§μ λΆλ¬μ€λλ° μ€ν¨νμ΅λλ€."} |
|
|
| |
| def youtube_transcript_interface(youtube_url): |
| transcript_data = get_youtube_transcript(youtube_url) |
| |
| if 'error' in transcript_data: |
| return transcript_data['error'] |
| |
| try: |
| |
| data = transcript_data['data'][0] |
| title = data.get('title', 'μ λͺ© μμ') |
| transcription = data.get('transcriptionAsText', 'μλ§μ΄ μμ΅λλ€.') |
| |
| |
| result = f"μ λͺ©: {title}\n\nμλ§:\n{transcription}" |
| return result |
| except (IndexError, KeyError, TypeError): |
| return "μλ§ λ°μ΄ν°λ₯Ό μ²λ¦¬νλ μ€ μ€λ₯κ° λ°μνμ΅λλ€." |
|
|
| |
| interface = gr.Interface( |
| fn=youtube_transcript_interface, |
| inputs="text", |
| outputs="text", |
| title="YouTube μλ§ μΆμΆκΈ°", |
| description="μ νλΈ URLμ μ
λ ₯νμΈμ." |
| ) |
|
|
| |
| interface.launch() |
|
|