Spaces:
Sleeping
Sleeping
File size: 3,419 Bytes
64284f9 ae477d3 64284f9 ae477d3 014002f ae477d3 64284f9 ae477d3 64284f9 ae477d3 64284f9 ae477d3 014002f ae477d3 014002f 64284f9 ae477d3 64284f9 ae477d3 64284f9 ae477d3 64284f9 ae477d3 64284f9 014002f ae477d3 64284f9 ae477d3 014002f ae477d3 014002f ae477d3 64284f9 ae477d3 64284f9 ae477d3 014002f 64284f9 ae477d3 64284f9 014002f 64284f9 ae477d3 014002f 64284f9 ae477d3 014002f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import openai
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import RecursiveCharacterTextSplitter
def get_transcript(video_id):
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# Try fetching the manual transcript
try:
transcript = transcript_list.find_manually_created_transcript()
language_code = transcript.language_code # Save the detected language
except:
# If no manual transcript is found, try fetching an auto-generated transcript in a supported language
try:
generated_transcripts = [trans for trans in transcript_list if trans.is_generated]
transcript = generated_transcripts[0]
language_code = transcript.language_code # Save the detected language
except:
# If no auto-generated transcript is found, raise an exception
raise Exception("No suitable transcript found.")
full_transcript = " ".join([part['text'] for part in transcript.fetch()])
return full_transcript, language_code # Return both the transcript and detected language
def summarize_with_langchain_and_openai(transcript, language_code, openai_api_key, model_name='gpt-3.5-turbo'):
# Set the OpenAI API key
openai.api_key = openai_api_key
# Split the document if it's too long
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
texts = text_splitter.split_text(transcript)
text_to_summarize = " ".join(texts[:4]) # Adjust this as needed
# Prepare the prompt for summarization
system_prompt = 'I want you to act as a Life Coach that can create good summaries!'
prompt = f'''Summarize the following text in {language_code}.
Text: {text_to_summarize}
Add a title to the summary in {language_code}.
Include an INTRODUCTION, BULLET POINTS if possible, and a CONCLUSION in {language_code}.'''
# Start summarizing using OpenAI
response = openai.ChatCompletion.create(
model=model_name,
messages=[
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': prompt}
],
temperature=1
)
return response['choices'][0]['message']['content']
def main():
st.title('YouTube Video Summarizer')
openai_api_key = st.text_input('Enter your OpenAI API Key:', type='password')
video_id = st.text_input('Enter the YouTube Video ID:')
if st.button('Start'):
if openai_api_key and video_id:
try:
progress = st.progress(0)
status_text = st.empty()
status_text.text('Loading the transcript...')
progress.progress(25)
# Getting both the transcript and language_code
transcript, language_code = get_transcript(video_id)
status_text.text(f'Creating summary...')
progress.progress(75)
summary = summarize_with_langchain_and_openai(transcript, language_code, openai_api_key)
status_text.text('Summary:')
st.markdown(summary)
progress.progress(100)
except Exception as e:
st.write(str(e))
else:
st.write('Please enter both your OpenAI API Key and YouTube Video ID.')
if __name__ == "__main__":
main()
|