Spaces:

Sayiqa
/

deployment

Sleeping

App Files Files Community

Sayiqa commited on Dec 30, 2024

Commit

7d41522

verified ·

1 Parent(s): 4e7cbd0

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -146

app.py CHANGED Viewed

@@ -456,6 +456,7 @@ subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
 subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
 subprocess.check_call(["pip", "install", "genai"])
 subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
 import transformers
 import torch
 import os
@@ -675,128 +676,6 @@ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, No
 import re
 from collections import Counter
 from googleapiclient.discovery import build
-def process_youtube_video(url="", keywords=""):
-    try:
-        # Initialize variables
-        thumbnail = None
-        summary = "No transcript available"
-        sentiment_label = "N/A"
-        recommendations = ""
-        subtitle_info = "No additional information available"
-        if not url.strip():
-            return None, "Please enter a YouTube URL", "N/A", "", ""
-        video_id = extract_video_id(url)
-        if not video_id:
-            return None, "Invalid YouTube URL", "N/A", "", ""
-        thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
-        try:
-            # Fetch transcript
-            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-            transcript = None
-            try:
-                transcript = transcript_list.find_transcript(['en'])
-            except:
-                transcript = transcript_list.find_generated_transcript(['en'])
-            text = " ".join([t['text'] for t in transcript.fetch()])
-            if not text.strip():
-                raise ValueError("Transcript is empty")
-            # Generate summary
-            model = genai.GenerativeModel("gemini-pro")
-            summary = model.generate_content(f"Summarize this: {text[:4000]}").text
-            # Extract subtitle information
-            subtitle_info = extract_subtitle_info(text)
-            # Sentiment analysis
-            sentiment = TextBlob(text[:1000]).sentiment
-            sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
-        except TranscriptsDisabled:
-            metadata = get_video_metadata(video_id)
-            summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
-            sentiment_label = "N/A"
-            subtitle_info = "No subtitles available for analysis."
-        except NoTranscriptFound:
-            metadata = get_video_metadata(video_id)
-            summary = metadata.get("description", "⚠️ No English transcript available.")
-            sentiment_label = "N/A"
-            subtitle_info = "No subtitles available for analysis."
-        except Exception as e:
-            return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
-        # Get recommendations
-        if keywords.strip():
-            recommendations = get_recommendations(keywords)
-        return thumbnail, summary, sentiment_label, subtitle_info, recommendations
-    except Exception as e:
-        return None, f"Error: {str(e)}", "N/A", "", ""
-def extract_video_id(url):
-    """
-    Extracts the video ID from a YouTube URL.
-    """
-    import re
-    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
-    return match.group(1) if match else None
-def get_video_metadata(video_id):
-    """
-    Fetches video metadata such as title and description using the YouTube Data API.
-    """
-    try:
-        from googleapiclient.discovery import build
-        # Replace with your YouTube Data API key
-        YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
-        youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
-        request = youtube.videos().list(part="snippet", id=video_id)
-        response = request.execute()
-        if "items" in response and len(response["items"]) > 0:
-            snippet = response["items"][0]["snippet"]
-            return {
-                "title": snippet.get("title", "No title available"),
-                "description": snippet.get("description", "No description available"),
-            }
-        return {}
-    except Exception as e:
-        return {"title": "Error fetching metadata", "description": str(e)}
-def extract_subtitle_info(text):
-    """
-    Extracts meaningful information from the subtitles.
-    This could include topics, key insights, or a breakdown of the content.
-    """
-    try:
-        # Split text into sentences for better analysis
-        sentences = text.split(". ")
-        # Example: Extract key topics or keywords
-        from collections import Counter
-        words = text.split()
-        common_words = Counter(words).most_common(10)
-        key_topics = ", ".join([word for word, count in common_words])
-        # Example: Provide a breakdown of the content
-        info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
-        return info
-    except Exception as e:
-        return f"Error extracting subtitle information: {str(e)}"
-##########
 # def process_youtube_video(url="", keywords=""):
 #     try:
 #         # Initialize variables
@@ -828,19 +707,16 @@ def extract_subtitle_info(text):
 #             if not text.strip():
 #                 raise ValueError("Transcript is empty")
-#             # Clean up the text for sentiment analysis
-#             cleaned_text = clean_text_for_analysis(text)
-#             # Sentiment analysis
-#             sentiment = TextBlob(cleaned_text).sentiment  # Use cleaned text for sentiment analysis
-#             sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
 #             # Generate summary
 #             model = genai.GenerativeModel("gemini-pro")
-#             summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
 #             # Extract subtitle information
-#             subtitle_info = extract_subtitle_info(cleaned_text)
 #         except TranscriptsDisabled:
 #             metadata = get_video_metadata(video_id)
@@ -869,6 +745,7 @@ def extract_subtitle_info(text):
 #     """
 #     Extracts the video ID from a YouTube URL.
 #     """
 #     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
 #     return match.group(1) if match else None
@@ -878,7 +755,10 @@ def extract_subtitle_info(text):
 #     Fetches video metadata such as title and description using the YouTube Data API.
 #     """
 #     try:
-#         YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"  # Replace with your YouTube Data API key
 #         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
 #         request = youtube.videos().list(part="snippet", id=video_id)
 #         response = request.execute()
@@ -905,6 +785,7 @@ def extract_subtitle_info(text):
 #         sentences = text.split(". ")
 #         # Example: Extract key topics or keywords
 #         words = text.split()
 #         common_words = Counter(words).most_common(10)
 #         key_topics = ", ".join([word for word, count in common_words])
@@ -916,23 +797,143 @@ def extract_subtitle_info(text):
 #     except Exception as e:
 #         return f"Error extracting subtitle information: {str(e)}"
-# def clean_text_for_analysis(text):
-#     """
-#     Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
-#     """
-#     # Remove extra spaces and line breaks
-#     cleaned_text = " ".join(text.split())
-#     return cleaned_text
-# def get_recommendations(keywords):
-#     """
-#     Fetches related video recommendations based on the provided keywords.
-#     This function can be expanded with a proper API or custom logic.
-#     """
-#     # Placeholder for fetching recommendations based on keywords
-#     return f"Recommendations for: {keywords}"  # Dummy return for now

 subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
 subprocess.check_call(["pip", "install", "genai"])
 subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
+subprocess.check_call(["pip", "install", "google-api-python-client"])
 import transformers
 import torch
 import os
 import re
 from collections import Counter
 from googleapiclient.discovery import build
 # def process_youtube_video(url="", keywords=""):
 #     try:
 #         # Initialize variables
 #             if not text.strip():
 #                 raise ValueError("Transcript is empty")
 #             # Generate summary
 #             model = genai.GenerativeModel("gemini-pro")
+#             summary = model.generate_content(f"Summarize this: {text[:4000]}").text
 #             # Extract subtitle information
+#             subtitle_info = extract_subtitle_info(text)
+#             # Sentiment analysis
+#             sentiment = TextBlob(text[:1000]).sentiment
+#             sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
 #         except TranscriptsDisabled:
 #             metadata = get_video_metadata(video_id)
 #     """
 #     Extracts the video ID from a YouTube URL.
 #     """
+#     import re
 #     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
 #     return match.group(1) if match else None
 #     Fetches video metadata such as title and description using the YouTube Data API.
 #     """
 #     try:
+#         from googleapiclient.discovery import build
+#         # Replace with your YouTube Data API key
+#         YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
 #         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
 #         request = youtube.videos().list(part="snippet", id=video_id)
 #         response = request.execute()
 #         sentences = text.split(". ")
 #         # Example: Extract key topics or keywords
+#         from collections import Counter
 #         words = text.split()
 #         common_words = Counter(words).most_common(10)
 #         key_topics = ", ".join([word for word, count in common_words])
 #     except Exception as e:
 #         return f"Error extracting subtitle information: {str(e)}"
+##########
+def process_youtube_video(url="", keywords=""):
+    try:
+        # Initialize variables
+        thumbnail = None
+        summary = "No transcript available"
+        sentiment_label = "N/A"
+        recommendations = ""
+        subtitle_info = "No additional information available"
+        if not url.strip():
+            return None, "Please enter a YouTube URL", "N/A", "", ""
+        video_id = extract_video_id(url)
+        if not video_id:
+            return None, "Invalid YouTube URL", "N/A", "", ""
+        thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
+        try:
+            # Fetch transcript
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            transcript = None
+            try:
+                transcript = transcript_list.find_transcript(['en'])
+            except:
+                transcript = transcript_list.find_generated_transcript(['en'])
+            text = " ".join([t['text'] for t in transcript.fetch()])
+            if not text.strip():
+                raise ValueError("Transcript is empty")
+            # Clean up the text for sentiment analysis
+            cleaned_text = clean_text_for_analysis(text)
+            # Sentiment analysis
+            sentiment = TextBlob(cleaned_text).sentiment  # Use cleaned text for sentiment analysis
+            sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
+            # Generate summary
+            model = genai.GenerativeModel("gemini-pro")
+            summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
+            # Extract subtitle information
+            subtitle_info = extract_subtitle_info(cleaned_text)
+        except TranscriptsDisabled:
+            metadata = get_video_metadata(video_id)
+            summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
+            sentiment_label = "N/A"
+            subtitle_info = "No subtitles available for analysis."
+        except NoTranscriptFound:
+            metadata = get_video_metadata(video_id)
+            summary = metadata.get("description", "⚠️ No English transcript available.")
+            sentiment_label = "N/A"
+            subtitle_info = "No subtitles available for analysis."
+        except Exception as e:
+            return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
+        # Get recommendations
+        if keywords.strip():
+            recommendations = get_recommendations(keywords)
+        return thumbnail, summary, sentiment_label, subtitle_info, recommendations
+    except Exception as e:
+        return None, f"Error: {str(e)}", "N/A", "", ""
+def extract_video_id(url):
+    """
+    Extracts the video ID from a YouTube URL.
+    """
+    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
+    return match.group(1) if match else None
+def get_video_metadata(video_id):
+    """
+    Fetches video metadata such as title and description using the YouTube Data API.
+    """
+    try:
+        YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"  # Replace with your YouTube Data API key
+        youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
+        request = youtube.videos().list(part="snippet", id=video_id)
+        response = request.execute()
+        if "items" in response and len(response["items"]) > 0:
+            snippet = response["items"][0]["snippet"]
+            return {
+                "title": snippet.get("title", "No title available"),
+                "description": snippet.get("description", "No description available"),
+            }
+        return {}
+    except Exception as e:
+        return {"title": "Error fetching metadata", "description": str(e)}
+def extract_subtitle_info(text):
+    """
+    Extracts meaningful information from the subtitles.
+    This could include topics, key insights, or a breakdown of the content.
+    """
+    try:
+        # Split text into sentences for better analysis
+        sentences = text.split(". ")
+        # Example: Extract key topics or keywords
+        words = text.split()
+        common_words = Counter(words).most_common(10)
+        key_topics = ", ".join([word for word, count in common_words])
+        # Example: Provide a breakdown of the content
+        info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
+        return info
+    except Exception as e:
+        return f"Error extracting subtitle information: {str(e)}"
+def clean_text_for_analysis(text):
+    """
+    Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
+    """
+    # Remove extra spaces and line breaks
+    cleaned_text = " ".join(text.split())
+    return cleaned_text
+def get_recommendations(keywords):
+    """
+    Fetches related video recommendations based on the provided keywords.
+    This function can be expanded with a proper API or custom logic.
+    """
+    # Placeholder for fetching recommendations based on keywords
+    return f"Recommendations for: {keywords}"  # Dummy return for now