File size: 3,419 Bytes
64284f9
ae477d3
64284f9
ae477d3
 
014002f
ae477d3
64284f9
ae477d3
64284f9
ae477d3
 
 
 
 
 
 
 
 
 
 
64284f9
ae477d3
 
 
014002f
 
 
ae477d3
 
 
 
014002f
64284f9
ae477d3
 
 
 
64284f9
ae477d3
 
 
 
64284f9
ae477d3
64284f9
ae477d3
 
 
 
64284f9
014002f
ae477d3
64284f9
ae477d3
014002f
 
 
 
 
ae477d3
014002f
ae477d3
 
 
64284f9
ae477d3
 
64284f9
ae477d3
014002f
64284f9
ae477d3
 
64284f9
014002f
64284f9
ae477d3
 
 
 
 
 
014002f
64284f9
ae477d3
014002f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import openai
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import RecursiveCharacterTextSplitter

def get_transcript(video_id):
    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

    # Try fetching the manual transcript
    try:
        transcript = transcript_list.find_manually_created_transcript()
        language_code = transcript.language_code  # Save the detected language
    except:
        # If no manual transcript is found, try fetching an auto-generated transcript in a supported language
        try:
            generated_transcripts = [trans for trans in transcript_list if trans.is_generated]
            transcript = generated_transcripts[0]
            language_code = transcript.language_code  # Save the detected language
        except:
            # If no auto-generated transcript is found, raise an exception
            raise Exception("No suitable transcript found.")

    full_transcript = " ".join([part['text'] for part in transcript.fetch()])
    return full_transcript, language_code  # Return both the transcript and detected language

def summarize_with_langchain_and_openai(transcript, language_code, openai_api_key, model_name='gpt-3.5-turbo'):
    # Set the OpenAI API key
    openai.api_key = openai_api_key

    # Split the document if it's too long
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
    texts = text_splitter.split_text(transcript)
    text_to_summarize = " ".join(texts[:4])  # Adjust this as needed

    # Prepare the prompt for summarization
    system_prompt = 'I want you to act as a Life Coach that can create good summaries!'
    prompt = f'''Summarize the following text in {language_code}.
    Text: {text_to_summarize}

    Add a title to the summary in {language_code}. 
    Include an INTRODUCTION, BULLET POINTS if possible, and a CONCLUSION in {language_code}.'''

    # Start summarizing using OpenAI
    response = openai.ChatCompletion.create(
        model=model_name,
        messages=[
            {'role': 'system', 'content': system_prompt},
            {'role': 'user', 'content': prompt}
        ],
        temperature=1
    )

    return response['choices'][0]['message']['content']

def main():
    st.title('YouTube Video Summarizer')
    
    openai_api_key = st.text_input('Enter your OpenAI API Key:', type='password')
    video_id = st.text_input('Enter the YouTube Video ID:')
    
    if st.button('Start'):
        if openai_api_key and video_id:
            try:
                progress = st.progress(0)
                status_text = st.empty()

                status_text.text('Loading the transcript...')
                progress.progress(25)

                # Getting both the transcript and language_code
                transcript, language_code = get_transcript(video_id)

                status_text.text(f'Creating summary...')
                progress.progress(75)

                summary = summarize_with_langchain_and_openai(transcript, language_code, openai_api_key)

                status_text.text('Summary:')
                st.markdown(summary)
                progress.progress(100)
            except Exception as e:
                st.write(str(e))
        else:
            st.write('Please enter both your OpenAI API Key and YouTube Video ID.')

if __name__ == "__main__":
    main()