you / app.py
Kims12's picture
Update app.py
a558f50 verified
import os
import requests
import json
import gradio as gr
import re
# Hugging Face ν™˜κ²½ λ³€μˆ˜λ‘œλΆ€ν„° RapidAPI 킀와 호슀트 κ°€μ Έμ˜€κΈ°
AA_KEY = os.getenv("AA_KEY")
AA_HOST = "youtube-transcriptor.p.rapidapi.com"
# μžλ§‰ μ–Έμ–΄ μš°μ„ μˆœμœ„ 리슀트
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh']
# 유튜브 URLμ—μ„œ λΉ„λ””μ˜€ IDλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
def get_video_id(youtube_url):
# 유튜브 URL λ˜λŠ” youtu.be 단좕 URLμ—μ„œ video_id μΆ”μΆœ
video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url)
# YouTube Shorts URL 처리
if not video_id_match:
video_id_match = re.search(r"(?<=shorts/)[^#&?]*", youtube_url)
return video_id_match.group(0) if video_id_match else None
# 유튜브 μžλ§‰μ„ μš”μ²­ν•˜λŠ” ν•¨μˆ˜ (μ–Έμ–΄ μš°μ„ μˆœμœ„λ₯Ό μ μš©ν•˜μ—¬ μ‹œλ„)
def get_youtube_transcript(youtube_url):
try:
# λΉ„λ””μ˜€ ID μΆ”μΆœ
video_id = get_video_id(youtube_url)
if video_id is None:
return {"error": "잘λͺ»λœ 유튜브 URLμž…λ‹ˆλ‹€."}
url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
headers = {
"x-rapidapi-key": AA_KEY,
"x-rapidapi-host": AA_HOST
}
# 1. μš°μ„ μˆœμœ„ μ–Έμ–΄λ‘œ μ‹œλ„
for lang in LANGUAGE_PRIORITY:
querystring = {"video_id": video_id, "lang": lang}
response = requests.get(url, headers=headers, params=querystring)
if response.status_code == 200:
data = response.json()
if data and not isinstance(data, str) and "error" not in data:
return {"language": lang, "data": data}
if isinstance(data, dict) and "availableLangs" in data:
available_langs = data["availableLangs"]
# 2. κ°€λŠ₯ν•œ μ–Έμ–΄κ°€ μžˆλ‹€λ©΄ ν•΄λ‹Ή μ–Έμ–΄λ‘œ μ‹œλ„
for available_lang in available_langs:
querystring = {"video_id": video_id, "lang": available_lang}
response = requests.get(url, headers=headers, params=querystring)
if response.status_code == 200:
data = response.json()
if data and not isinstance(data, str) and "error" not in data:
return {"language": available_lang, "data": data}
# 3. λͺ¨λ“  μ‹œλ„ μ‹€νŒ¨μ‹œ
return {"error": "μžλ§‰μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."}
except Exception as e:
return {"error": "μžλ§‰μ„ λΆˆλŸ¬μ˜€λŠ”λ° μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€."}
# Gradio μΈν„°νŽ˜μ΄μŠ€ ν•¨μˆ˜
def youtube_transcript_interface(youtube_url):
transcript_data = get_youtube_transcript(youtube_url)
if 'error' in transcript_data:
return transcript_data['error']
try:
# 데이터가 리슀트인 경우 첫 번째 ν•­λͺ©μ„ μ‚¬μš©
data = transcript_data['data'][0]
title = data.get('title', '제λͺ© μ—†μŒ')
transcription = data.get('transcriptionAsText', 'μžλ§‰μ΄ μ—†μŠ΅λ‹ˆλ‹€.')
# μ›ν•˜λŠ” ν˜•μ‹μœΌλ‘œ λ°˜ν™˜
result = f"제λͺ©: {title}\n\nμžλ§‰:\n{transcription}"
return result
except (IndexError, KeyError, TypeError):
return "μžλ§‰ 데이터λ₯Ό μ²˜λ¦¬ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
# Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
interface = gr.Interface(
fn=youtube_transcript_interface,
inputs="text",
outputs="text",
title="YouTube μžλ§‰ μΆ”μΆœκΈ°",
description="유튜브 URL을 μž…λ ₯ν•˜μ„Έμš”."
)
# Gradio μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
interface.launch()