Spaces:

Sayiqa7
/

Youtube_summarization

Runtime error

App Files Files Community

Sayiqa7 commited on Dec 31, 2024

Commit

dbac2c6

verified ·

1 Parent(s): fc7aa4a

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -0

app.py CHANGED Viewed

@@ -110,6 +110,78 @@ courses_data = [
 ]
 def get_recommendations(keywords, max_results=5):
     if not keywords:
         return "Please provide search keywords"
@@ -136,6 +208,55 @@ def get_recommendations(keywords, max_results=5):
     except Exception as e:
         return f"Error: {str(e)}"
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft()) as app:

 ]
+def extract_video_id(url):
+    # Improved regex to handle various YouTube URL formats
+    match = re.search(r"(?:v=|\/|be\/|embed\/|watch\?v=)([0-9A-Za-z_-]{11})", url)
+    return match.group(1) if match else None
+def get_video_metadata(video_id):
+    try:
+        youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
+        request = youtube.videos().list(part="snippet,contentDetails", id=video_id)
+        response = request.execute()
+        if "items" in response and len(response["items"]) > 0:
+            snippet = response["items"][0]["snippet"]
+            return {
+                "title": snippet.get("title", "No title available"),
+                "description": snippet.get("description", "No description available"),
+                "publishedAt": snippet.get("publishedAt", "Unknown"),
+                "channelTitle": snippet.get("channelTitle", "Unknown"),
+            }
+        return {}
+    except Exception as e:
+        return {"title": "Error fetching metadata", "description": str(e)}
+def clean_text_for_analysis(text):
+    return " ".join(text.split())
+def segment_transcript(transcript):
+    # Segment the transcript into introduction, key points, main body, and conclusion
+    intro, key_points, main_body, conclusion = [], [], [], []
+    total_segments = len(transcript)
+    for idx, segment in enumerate(transcript):
+        start_time = segment['start']
+        text = segment['text']
+        # Use rough heuristics to segment transcript
+        if idx < total_segments * 0.1:  # First 10% as introduction
+            intro.append(text)
+        elif idx < total_segments * 0.5:  # Next 40% as key points
+            key_points.append(text)
+        elif idx < total_segments * 0.9:  # Next 40% as main body
+            main_body.append(text)
+        else:  # Last 10% as conclusion
+            conclusion.append(text)
+    return {
+        "introduction": " ".join(intro),
+        "key_points": " ".join(key_points),
+        "main_body": " ".join(main_body),
+        "conclusion": " ".join(conclusion),
+    }
+def generate_summary(segments):
+    # Generate a formatted summary
+    return (
+        "**Introduction:**\n" + segments["introduction"][:400] + "...\n\n" +
+        "**Key Points:**\n" + segments["key_points"][:400] + "...\n\n" +
+        "**Main Body:**\n" + segments["main_body"][:400] + "...\n\n" +
+        "**Conclusion:**\n" + segments["conclusion"][:400] + "...\n"
+    )
+def analyze_sentiment(text):
+    sentiment = TextBlob(text).sentiment
+    sentiment_label = (
+        "Positive" if sentiment.polarity > 0 else
+        "Negative" if sentiment.polarity < 0 else
+        "Neutral"
+    )
+    return f"{sentiment_label} ({sentiment.polarity:.2f})"
 def get_recommendations(keywords, max_results=5):
     if not keywords:
         return "Please provide search keywords"
     except Exception as e:
         return f"Error: {str(e)}"
+def process_youtube_video(url):
+    try:
+        video_id = extract_video_id(url)
+        if not video_id:
+            return None, "Invalid YouTube URL", "N/A"
+        metadata = get_video_metadata(video_id)
+        title = metadata.get("title", "No title")
+        description = metadata.get("description", "No description available")
+        thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
+        summary = ""
+        sentiment_label = "N/A"
+        try:
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            transcript = None
+            try:
+                transcript = transcript_list.find_transcript(['en'])
+            except:
+                transcript = transcript_list.find_generated_transcript(['en'])
+            text_segments = transcript.fetch()
+            transcript_text = " ".join([segment['text'] for segment in text_segments])
+            cleaned_text = clean_text_for_analysis(transcript_text)
+            segmented = segment_transcript(text_segments)
+            summary = generate_summary(segmented)
+            sentiment_label = analyze_sentiment(cleaned_text)
+        except (TranscriptsDisabled, NoTranscriptFound):
+            summary = "No transcript available."
+        return thumbnail, title, description, summary, sentiment_label
+    except Exception as e:
+        return None, f"Error: {str(e)}", "N/A", "N/A", "N/A"
+url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
+thumbnail, title, description, summary, sentiment = process_youtube_video(url)
+print(f"Thumbnail: {thumbnail}\n")
+print(f"Title: {title}\n")
+print(f"Description:\n{description}\n")
+print(f"Summary:\n{summary}\n")
+print(f"Sentiment: {sentiment}")
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft()) as app: