Spaces:

Abu1998
/

YouTubeTranscriptApi1

Sleeping

App Files Files Community

Abu1998 commited on Aug 10, 2024

Commit

ae477d3

verified ·

1 Parent(s): 048df7e

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -62

app.py CHANGED Viewed

@@ -1,80 +1,93 @@
-import streamlit as st
 import openai
 from youtube_transcript_api import YouTubeTranscriptApi
-# Function to get YouTube transcript
-def get_transcript(video_id):
-    """Fetches the transcript for a given YouTube video ID."""
     try:
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-        transcript = " ".join([entry['text'] for entry in transcript_list])
-        return transcript
-    except Exception as e:
-        st.error(f"Error fetching transcript: {e}")
-        return None
-# Function to summarize the transcript using OpenAI's GPT API
-def summarize_transcript(transcript):
-    """Summarizes the given transcript using OpenAI's GPT API."""
-    if transcript is None:
-        return None
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "You are a helpful AI assistant that summarizes YouTube videos."},
-            {"role": "user", "content": f"Please provide a concise summary of the following YouTube video transcript:\n\n{transcript}"}
-        ]
-    )
-    summary = response.choices[0].message["content"].strip()
-    return summary
-# Function to generate a 1-minute YouTube Shorts script based on the summary
-def generate_shorts_script(summary):
-    """Generates a 1-minute shorts script based on the summary."""
-    if summary is None:
-        return None
     response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
         messages=[
-            {"role": "system", "content": "You are a creative script writer for YouTube Shorts."},
-            {"role": "user", "content": f"Write a captivating and engaging 1-minute script for a YouTube Short based on this summary: {summary}. Focus on impactful visuals and concise storytelling."}
-        ]
     )
-    script = response.choices[0].message["content"].strip()
-    return script
-# Streamlit UI
-st.title("YouTube Video to Shorts Script Converter")
-# Input fields
-openai_api_key = st.text_input("OpenAI API Key", type="password")
-video_id = st.text_input("YouTube Video ID")
-if st.button("Generate Shorts Script"):
-    if openai_api_key and video_id:
-        try:
-            # Set OpenAI API key
-            openai.api_key = openai_api_key
-            # Get transcript
-            transcript = get_transcript(video_id)
-            # Summarize the transcript
-            summary = summarize_transcript(transcript)
-            # Generate shorts script
-            if summary:
-                shorts_script = generate_shorts_script(summary)
-                # Display the generated shorts script
-                if shorts_script:
-                    st.write("Generated YouTube Shorts Script:")
-                    st.write(shorts_script)
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
-    else:
-        st.error("Please fill in both the OpenAI API Key and YouTube Video ID.")

+import os
 import openai
+import streamlit as st
 from youtube_transcript_api import YouTubeTranscriptApi
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from dotenv import load_dotenv, find_dotenv
+# Specify the path to your .env file
+env_path = '/home/USER/.env/openai_api' # Change the Path
+# Load the OpenAI API key from the .env file
+load_dotenv(env_path)
+openai.api_key = os.getenv('OPENAI_API_KEY')
+def get_transcript(youtube_url):
+    video_id = youtube_url.split("v=")[-1]
+    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+    # Try fetching the manual transcript
     try:
+        transcript = transcript_list.find_manually_created_transcript()
+        language_code = transcript.language_code  # Save the detected language
+    except:
+        # If no manual transcript is found, try fetching an auto-generated transcript in a supported language
+        try:
+            generated_transcripts = [trans for trans in transcript_list if trans.is_generated]
+            transcript = generated_transcripts[0]
+            language_code = transcript.language_code  # Save the detected language
+        except:
+            # If no auto-generated transcript is found, raise an exception
+            raise Exception("No suitable transcript found.")
+    full_transcript = " ".join([part['text'] for part in transcript.fetch()])
+    return full_transcript, language_code  # Return both the transcript and detected language
+def summarize_with_langchain_and_openai(transcript, language_code, model_name='gpt-3.5-turbo'):
+    # Split the document if it's too long
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
+    texts = text_splitter.split_text(transcript)
+    text_to_summarize = " ".join(texts[:4]) # Adjust this as needed
+    # Prepare the prompt for summarization
+    system_prompt = 'I want you to act as a Life Coach that can create good summaries!'
+    prompt = f'''Summarize the following text in {language_code}.
+    Text: {text_to_summarize}
+    Add a title to the summary in {language_code}.
+    Include an INTRODUCTION, BULLET POINTS if possible, and a CONCLUSION in {language_code}.'''
+    # Start summarizing using OpenAI
     response = openai.ChatCompletion.create(
+        model=model_name,
         messages=[
+            {'role': 'system', 'content': system_prompt},
+            {'role': 'user', 'content': prompt}
+        ],
+        temperature=1
     )
+    return response['choices'][0]['message']['content']
+def main():
+    st.title('YouTube video summarizer')
+    link = st.text_input('Enter the link of the YouTube video you want to summarize:')
+    if st.button('Start'):
+        if link:
+            try:
+                progress = st.progress(0)
+                status_text = st.empty()
+                status_text.text('Loading the transcript...')
+                progress.progress(25)
+                # Getting both the transcript and language_code
+                transcript, language_code = get_transcript(link)
+                status_text.text(f'Creating summary...')
+                progress.progress(75)
+                model_name = 'gpt-3.5-turbo'
+                summary = summarize_with_langchain_and_openai(transcript, language_code, model_name)
+                status_text.text('Summary:')
+                st.markdown(summary)
+                progress.progress(100)
+            except Exception as e:
+                st.write(str(e))
+        else:
+            st.write('Please enter a valid YouTube link.')
+if __name__ == "__main__":
+    main()