Spaces:

AIRider
/

YT_Script_Shorts

Paused

App Files Files Community

YT_Script_Shorts / app.py

AIRider

Update app.py

85efe2c verified over 1 year ago

raw

history blame

6.1 kB

	import gradio as gr
	from gradio_client import Client
	import json
	import logging
	import openai
	import os
	import re

	# 로깅 설정
	logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
	format='%(asctime)s - %(levelname)s - %(message)s')

	openai.api_key = os.getenv("OPENAI_API_KEY")

	# 문장 구분 함수
	def split_sentences(text):
	sentences = re.split(r"(니다\|에요\|구나\|해요\|군요\|겠어요\|시오\|해라\|예요\|아요\|데요\|대요\|세요\|어요\|게요\|구요\|고요\|나요\|하죠)(?![\w])", text)
	combined_sentences = []
	current_sentence = ""
	for i in range(0, len(sentences), 2):
	if i + 1 < len(sentences):
	sentence = sentences[i] + sentences[i + 1]
	else:
	sentence = sentences[i]
	if len(current_sentence) + len(sentence) > 100: # 100자를 초과할 경우
	combined_sentences.append(current_sentence.strip())
	current_sentence = sentence.strip()
	else:
	current_sentence += sentence
	if sentence.endswith(('.', '?', '!')):
	combined_sentences.append(current_sentence.strip())
	current_sentence = ""
	if current_sentence:
	combined_sentences.append(current_sentence.strip())
	return combined_sentences

	def parse_api_response(response):
	try:
	if isinstance(response, str):
	response = json.loads(response)
	if isinstance(response, list) and len(response) > 0:
	response = response[0]
	if not isinstance(response, dict):
	raise ValueError(f"예상치 못한 응답 형식입니다. 받은 데이터 타입: {type(response)}")
	return response
	except Exception as e:
	logging.error(f"API 응답 파싱 실패: {str(e)}")
	raise ValueError(f"API 응답 파싱 실패: {str(e)}")

	def get_youtube_script(url):
	logging.info(f"스크립트 추출 시작: URL = {url}")
	client = Client("whispersound/YT_Ts_R")
	try:
	result = client.predict(youtube_url=url, api_name="/predict")
	parsed_result = parse_api_response(result)

	if 'data' not in parsed_result or not parsed_result['data']:
	raise ValueError("API 응답에 유효한 데이터가 없습니다.")

	data = parsed_result["data"][0]
	title = data.get("title", "제목 없음")
	description = data.get("description", "설명 없음")
	transcription_text = data.get("transcriptionAsText", "")

	if not transcription_text:
	raise ValueError("추출된 스크립트가 없습니다.")

	logging.info("스크립트 추출 완료")
	return title, description, transcription_text
	except Exception as e:
	logging.exception("스크립트 추출 중 오류 발생")
	raise

	def call_api(prompt, max_tokens, temperature, top_p):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4o-mini",
	messages=[{"role": "user", "content": prompt}],
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p
	)
	return response['choices'][0]['message']['content']
	except Exception as e:
	logging.exception("LLM API 호출 중 오류 발생")
	raise

	def summarize_text(title, description, text):
	prompt = f"""
	제목: {title}
	설명: {description}

	위의 제목과 설명은 이 유튜브 영상의 원본 메타데이터입니다. 이를 참고하여 아래의 대본을 요약해주세요.

	1. 위의 제목과 설명을 참고하여 유튜브 대본의 핵심 주제와 모든 주요 내용을 상세하게 요약하라
	2. 반드시 한글로 작성하라
	3. 요약문만으로도 영상을 직접 시청한 것과 동일한 수준으로 내용을 이해할 수 있도록 상세히 작성
	4. 글을 너무 압축하거나 함축하지 말고, 중요한 내용과 세부사항을 모두 포함
	5. 반드시 대본의 흐름과 논리 구조를 유지
	6. 반드시 시간 순서나 사건의 전개 과정을 명확하게 반영
	7. 등장인물, 장소, 사건 등 중요한 요소를 정확하게 작성
	8. 대본에서 전달하는 감정이나 분위기도 포함
	9. 반드시 기술적 용어나 전문 용어가 있을 경우, 이를 정확하게 사용
	10. 대본의 목적이나 의도를 파악하고, 이를 요약에 반드시 반영
	11. 각 문장을 명확하게 구분하고, 적절한 단락 구분을 사용하여 가독성을 높이시오

	대본:
	{text}
	"""
	return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)

	def analyze(url, progress=gr.Progress()):
	try:
	progress(0, desc="스크립트 추출 중...")
	title, description, script = get_youtube_script(url)

	progress(33, desc="원문 스크립트 처리 중...")
	script_sentences = split_sentences(script)
	script_content = "\n".join(script_sentences)

	progress(66, desc="요약 생성 중...")
	summary = summarize_text(title, description, script)

	progress(100, desc="완료")
	return {
	"제목": title,
	"원문 스크립트": script_content,
	"요약": summary
	}
	except Exception as e:
	error_msg = f"처리 중 오류 발생: {str(e)}"
	logging.exception(error_msg)
	return {"오류": error_msg}

	# Gradio 인터페이스
	with gr.Blocks() as demo:
	gr.Markdown("## YouTube 스크립트 추출 및 요약 도구")
	youtube_url_input = gr.Textbox(label="YouTube URL 입력")
	analyze_button = gr.Button("분석하기")

	with gr.Tabs():
	with gr.TabItem("원문 스크립트"):
	script_output = gr.Markdown()
	with gr.TabItem("요약"):
	summary_output = gr.Markdown()

	title_output = gr.Textbox(label="영상 제목")

	analyze_button.click(
	analyze,
	inputs=[youtube_url_input],
	outputs=[title_output, script_output, summary_output]
	)

	if __name__ == "__main__":
	demo.launch()