Spaces:

dioarafl
/

snapSync

Runtime error

App Files Files Community

dioarafl commited on May 13, 2024

Commit

bcb6910

verified ·

1 Parent(s): 8918d97

Create app.py

Browse files

Files changed (1) hide show

app.py +94 -0

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import requests
+import streamlit as st
+from youtube_transcript_api import YouTubeTranscriptApi
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from dotenv import load_dotenv
+# Load environment variables from the .env file in the project directory
+load_dotenv()
+# Access environment variables
+API_URL = os.getenv('HUGGING_FACE_API_URL')
+API_KEY = os.getenv('HUGGING_FACE_API_KEY')
+def get_transcript(youtube_url):
+    video_id = youtube_url.split("v=")[-1]
+    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+    try:
+        # Try to fetch the manual transcript
+        transcript = transcript_list.find_manually_created_transcript()
+        language_code = transcript.language_code  # Save the detected language
+    except:
+        try:
+            # If no manual transcript is found, try fetching an auto-generated transcript in a supported language
+            generated_transcripts = [trans for trans in transcript_list if trans.is_generated]
+            transcript = generated_transcripts[0]
+            language_code = transcript.language_code  # Save the detected language
+        except:
+            # If no auto-generated transcript is found, raise an exception
+            raise Exception("No suitable transcript found.")
+    full_transcript = " ".join([part['text'] for part in transcript.fetch()])
+    return full_transcript, language_code  # Return both the transcript and detected language
+def summarize_with_hugging_face(transcript, language_code, model_name='meta-llama/Meta-Llama-3-8B'):
+    # Split the document if it's too long
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
+    texts = text_splitter.split_text(transcript)
+    text_to_summarize = " ".join(texts[:4])  # Adjust this as needed
+    # Prepare the payload for summarization
+    payload = {
+        "inputs": {
+            "prompt": f'''Summarize the following text in {language_code}.
+            Text: {text_to_summarize}
+            Add a title to the summary in {language_code}.
+            Include an INTRODUCTION, BULLET POINTS if possible, and a CONCLUSION in {language_code}.'''
+        }
+    }
+    # Start summarizing using Hugging Face
+    headers = {"Authorization": f"Bearer {API_KEY}"}
+    response = requests.post(API_URL, headers=headers, json=payload)
+    if response.status_code == 200:
+        return response.json()["generated_text"]
+    else:
+        raise Exception("Summarization failed.")
+def main():
+    st.title('YouTube Video Summarizer')
+    link = st.text_input('Enter the link of the YouTube video you want to summarize:')
+    if st.button('Start'):
+        if link:
+            try:
+                progress = st.progress(0)
+                status_text = st.empty()
+                status_text.text('Loading the transcript...')
+                progress.progress(25)
+                # Get both the transcript and language_code
+                transcript, language_code = get_transcript(link)
+                status_text.text(f'Creating summary...')
+                progress.progress(75)
+                summary = summarize_with_hugging_face(transcript, language_code)
+                status_text.text('Summary:')
+                st.markdown(summary)
+                progress.progress(100)
+            except Exception as e:
+                st.write(str(e))
+        else:
+            st.write('Please enter a valid YouTube link.')
+if __name__ == "__main__":
+    main()