Spaces:

Sayiqa7
/

Youtube_summarization

Runtime error

App Files Files Community

Sayiqa7 commited on Dec 31, 2024

Commit

a274f10

verified ·

1 Parent(s): dbac2c6

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -83

app.py CHANGED Viewed

@@ -33,7 +33,6 @@ import re
 from googleapiclient.discovery import build
 from huggingface_hub import login
 from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-from youtube_transcript_api._errors import NoTranscriptFound, TranscriptsDisabled
 def install_missing_packages():
     required_packages = {
          "torch":">=1.11.0",
@@ -109,17 +108,14 @@ courses_data = [
     (5, "Mathematics", "Ms. Smith", "Intermediate")
 ]
 def extract_video_id(url):
-    # Improved regex to handle various YouTube URL formats
-    match = re.search(r"(?:v=|\/|be\/|embed\/|watch\?v=)([0-9A-Za-z_-]{11})", url)
     return match.group(1) if match else None
 def get_video_metadata(video_id):
     try:
         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
-        request = youtube.videos().list(part="snippet,contentDetails", id=video_id)
         response = request.execute()
         if "items" in response and len(response["items"]) > 0:
@@ -127,8 +123,6 @@ def get_video_metadata(video_id):
             return {
                 "title": snippet.get("title", "No title available"),
                 "description": snippet.get("description", "No description available"),
-                "publishedAt": snippet.get("publishedAt", "Unknown"),
-                "channelTitle": snippet.get("channelTitle", "Unknown"),
             }
         return {}
@@ -138,50 +132,6 @@ def get_video_metadata(video_id):
 def clean_text_for_analysis(text):
     return " ".join(text.split())
-def segment_transcript(transcript):
-    # Segment the transcript into introduction, key points, main body, and conclusion
-    intro, key_points, main_body, conclusion = [], [], [], []
-    total_segments = len(transcript)
-    for idx, segment in enumerate(transcript):
-        start_time = segment['start']
-        text = segment['text']
-        # Use rough heuristics to segment transcript
-        if idx < total_segments * 0.1:  # First 10% as introduction
-            intro.append(text)
-        elif idx < total_segments * 0.5:  # Next 40% as key points
-            key_points.append(text)
-        elif idx < total_segments * 0.9:  # Next 40% as main body
-            main_body.append(text)
-        else:  # Last 10% as conclusion
-            conclusion.append(text)
-    return {
-        "introduction": " ".join(intro),
-        "key_points": " ".join(key_points),
-        "main_body": " ".join(main_body),
-        "conclusion": " ".join(conclusion),
-    }
-def generate_summary(segments):
-    # Generate a formatted summary
-    return (
-        "**Introduction:**\n" + segments["introduction"][:400] + "...\n\n" +
-        "**Key Points:**\n" + segments["key_points"][:400] + "...\n\n" +
-        "**Main Body:**\n" + segments["main_body"][:400] + "...\n\n" +
-        "**Conclusion:**\n" + segments["conclusion"][:400] + "...\n"
-    )
-def analyze_sentiment(text):
-    sentiment = TextBlob(text).sentiment
-    sentiment_label = (
-        "Positive" if sentiment.polarity > 0 else
-        "Negative" if sentiment.polarity < 0 else
-        "Neutral"
-    )
-    return f"{sentiment_label} ({sentiment.polarity:.2f})"
 def get_recommendations(keywords, max_results=5):
     if not keywords:
         return "Please provide search keywords"
@@ -190,9 +140,10 @@ def get_recommendations(keywords, max_results=5):
             "https://www.googleapis.com/youtube/v3/search",
             params={
                 "part": "snippet",
-                "q": keywords,
                 "type": "video",
                 "maxResults": max_results,
                 "key": YOUTUBE_API_KEY
             }
         ).json()
@@ -210,17 +161,15 @@ def get_recommendations(keywords, max_results=5):
 def process_youtube_video(url):
     try:
         video_id = extract_video_id(url)
         if not video_id:
             return None, "Invalid YouTube URL", "N/A"
-        metadata = get_video_metadata(video_id)
-        title = metadata.get("title", "No title")
-        description = metadata.get("description", "No description available")
         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
-        summary = ""
-        sentiment_label = "N/A"
         try:
             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -230,33 +179,26 @@ def process_youtube_video(url):
             except:
                 transcript = transcript_list.find_generated_transcript(['en'])
-            text_segments = transcript.fetch()
-            transcript_text = " ".join([segment['text'] for segment in text_segments])
-            cleaned_text = clean_text_for_analysis(transcript_text)
-            segmented = segment_transcript(text_segments)
-            summary = generate_summary(segmented)
-            sentiment_label = analyze_sentiment(cleaned_text)
-        except (TranscriptsDisabled, NoTranscriptFound):
-            summary = "No transcript available."
-        return thumbnail, title, description, summary, sentiment_label
-    except Exception as e:
-        return None, f"Error: {str(e)}", "N/A", "N/A", "N/A"
-url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
-thumbnail, title, description, summary, sentiment = process_youtube_video(url)
-print(f"Thumbnail: {thumbnail}\n")
-print(f"Title: {title}\n")
-print(f"Description:\n{description}\n")
-print(f"Summary:\n{summary}\n")
-print(f"Sentiment: {sentiment}")
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft()) as app:
@@ -409,6 +351,3 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
 if __name__ == "__main__":
     app.launch()

 from googleapiclient.discovery import build
 from huggingface_hub import login
 from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 def install_missing_packages():
     required_packages = {
          "torch":">=1.11.0",
     (5, "Mathematics", "Ms. Smith", "Intermediate")
 ]
 def extract_video_id(url):
+    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
     return match.group(1) if match else None
 def get_video_metadata(video_id):
     try:
         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
+        request = youtube.videos().list(part="snippet", id=video_id)
         response = request.execute()
         if "items" in response and len(response["items"]) > 0:
             return {
                 "title": snippet.get("title", "No title available"),
                 "description": snippet.get("description", "No description available"),
             }
         return {}
 def clean_text_for_analysis(text):
     return " ".join(text.split())
 def get_recommendations(keywords, max_results=5):
     if not keywords:
         return "Please provide search keywords"
             "https://www.googleapis.com/youtube/v3/search",
             params={
                 "part": "snippet",
+                "q": f"educational {keywords}",
                 "type": "video",
                 "maxResults": max_results,
+                "relevanceLanguage": "en",
                 "key": YOUTUBE_API_KEY
             }
         ).json()
 def process_youtube_video(url):
     try:
+        thumbnail = None
+        summary = "No transcript available"
+        sentiment_label = "N/A"
         video_id = extract_video_id(url)
         if not video_id:
             return None, "Invalid YouTube URL", "N/A"
         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
         try:
             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
             except:
                 transcript = transcript_list.find_generated_transcript(['en'])
+            text = " ".join([t['text'] for t in transcript.fetch()])
+            if not text.strip():
+                raise ValueError("Transcript is empty")
+            cleaned_text = clean_text_for_analysis(text)
+            sentiment = TextBlob(cleaned_text).sentiment
+            sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
+            summary = f"Summary: {cleaned_text[:400]}..."
+        except (TranscriptsDisabled, NoTranscriptFound):
+            metadata = get_video_metadata(video_id)
+            summary = metadata.get("description", "No subtitles available")
+            sentiment_label = "N/A"
+        return thumbnail, summary, sentiment_label
+    except Exception as e:
+        return None, f"Error: {str(e)}", "N/A"
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft()) as app:
 if __name__ == "__main__":
     app.launch()