Sayiqa commited on
Commit
7d41522
·
verified ·
1 Parent(s): 4e7cbd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -146
app.py CHANGED
@@ -456,6 +456,7 @@ subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
456
  subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
457
  subprocess.check_call(["pip", "install", "genai"])
458
  subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
 
459
  import transformers
460
  import torch
461
  import os
@@ -675,128 +676,6 @@ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, No
675
  import re
676
  from collections import Counter
677
  from googleapiclient.discovery import build
678
- def process_youtube_video(url="", keywords=""):
679
- try:
680
- # Initialize variables
681
- thumbnail = None
682
- summary = "No transcript available"
683
- sentiment_label = "N/A"
684
- recommendations = ""
685
- subtitle_info = "No additional information available"
686
-
687
- if not url.strip():
688
- return None, "Please enter a YouTube URL", "N/A", "", ""
689
-
690
- video_id = extract_video_id(url)
691
- if not video_id:
692
- return None, "Invalid YouTube URL", "N/A", "", ""
693
-
694
- thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
695
-
696
- try:
697
- # Fetch transcript
698
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
699
- transcript = None
700
- try:
701
- transcript = transcript_list.find_transcript(['en'])
702
- except:
703
- transcript = transcript_list.find_generated_transcript(['en'])
704
-
705
- text = " ".join([t['text'] for t in transcript.fetch()])
706
- if not text.strip():
707
- raise ValueError("Transcript is empty")
708
-
709
- # Generate summary
710
- model = genai.GenerativeModel("gemini-pro")
711
- summary = model.generate_content(f"Summarize this: {text[:4000]}").text
712
-
713
- # Extract subtitle information
714
- subtitle_info = extract_subtitle_info(text)
715
-
716
- # Sentiment analysis
717
- sentiment = TextBlob(text[:1000]).sentiment
718
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
719
-
720
- except TranscriptsDisabled:
721
- metadata = get_video_metadata(video_id)
722
- summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
723
- sentiment_label = "N/A"
724
- subtitle_info = "No subtitles available for analysis."
725
- except NoTranscriptFound:
726
- metadata = get_video_metadata(video_id)
727
- summary = metadata.get("description", "⚠️ No English transcript available.")
728
- sentiment_label = "N/A"
729
- subtitle_info = "No subtitles available for analysis."
730
- except Exception as e:
731
- return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
732
-
733
- # Get recommendations
734
- if keywords.strip():
735
- recommendations = get_recommendations(keywords)
736
-
737
- return thumbnail, summary, sentiment_label, subtitle_info, recommendations
738
-
739
- except Exception as e:
740
- return None, f"Error: {str(e)}", "N/A", "", ""
741
-
742
-
743
- def extract_video_id(url):
744
- """
745
- Extracts the video ID from a YouTube URL.
746
- """
747
- import re
748
- match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
749
- return match.group(1) if match else None
750
-
751
-
752
- def get_video_metadata(video_id):
753
- """
754
- Fetches video metadata such as title and description using the YouTube Data API.
755
- """
756
- try:
757
- from googleapiclient.discovery import build
758
-
759
- # Replace with your YouTube Data API key
760
- YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
761
- youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
762
- request = youtube.videos().list(part="snippet", id=video_id)
763
- response = request.execute()
764
-
765
- if "items" in response and len(response["items"]) > 0:
766
- snippet = response["items"][0]["snippet"]
767
- return {
768
- "title": snippet.get("title", "No title available"),
769
- "description": snippet.get("description", "No description available"),
770
- }
771
- return {}
772
-
773
- except Exception as e:
774
- return {"title": "Error fetching metadata", "description": str(e)}
775
-
776
-
777
- def extract_subtitle_info(text):
778
- """
779
- Extracts meaningful information from the subtitles.
780
- This could include topics, key insights, or a breakdown of the content.
781
- """
782
- try:
783
- # Split text into sentences for better analysis
784
- sentences = text.split(". ")
785
-
786
- # Example: Extract key topics or keywords
787
- from collections import Counter
788
- words = text.split()
789
- common_words = Counter(words).most_common(10)
790
- key_topics = ", ".join([word for word, count in common_words])
791
-
792
- # Example: Provide a breakdown of the content
793
- info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
794
-
795
- return info
796
- except Exception as e:
797
- return f"Error extracting subtitle information: {str(e)}"
798
-
799
- ##########
800
  # def process_youtube_video(url="", keywords=""):
801
  # try:
802
  # # Initialize variables
@@ -828,19 +707,16 @@ def extract_subtitle_info(text):
828
  # if not text.strip():
829
  # raise ValueError("Transcript is empty")
830
 
831
- # # Clean up the text for sentiment analysis
832
- # cleaned_text = clean_text_for_analysis(text)
833
-
834
- # # Sentiment analysis
835
- # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
836
- # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
837
-
838
  # # Generate summary
839
  # model = genai.GenerativeModel("gemini-pro")
840
- # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
841
 
842
  # # Extract subtitle information
843
- # subtitle_info = extract_subtitle_info(cleaned_text)
 
 
 
 
844
 
845
  # except TranscriptsDisabled:
846
  # metadata = get_video_metadata(video_id)
@@ -869,6 +745,7 @@ def extract_subtitle_info(text):
869
  # """
870
  # Extracts the video ID from a YouTube URL.
871
  # """
 
872
  # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
873
  # return match.group(1) if match else None
874
 
@@ -878,7 +755,10 @@ def extract_subtitle_info(text):
878
  # Fetches video metadata such as title and description using the YouTube Data API.
879
  # """
880
  # try:
881
- # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
 
 
 
882
  # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
883
  # request = youtube.videos().list(part="snippet", id=video_id)
884
  # response = request.execute()
@@ -905,6 +785,7 @@ def extract_subtitle_info(text):
905
  # sentences = text.split(". ")
906
 
907
  # # Example: Extract key topics or keywords
 
908
  # words = text.split()
909
  # common_words = Counter(words).most_common(10)
910
  # key_topics = ", ".join([word for word, count in common_words])
@@ -916,23 +797,143 @@ def extract_subtitle_info(text):
916
  # except Exception as e:
917
  # return f"Error extracting subtitle information: {str(e)}"
918
 
 
 
 
 
 
 
 
 
 
919
 
920
- # def clean_text_for_analysis(text):
921
- # """
922
- # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
923
- # """
924
- # # Remove extra spaces and line breaks
925
- # cleaned_text = " ".join(text.split())
926
- # return cleaned_text
927
 
 
 
 
928
 
929
- # def get_recommendations(keywords):
930
- # """
931
- # Fetches related video recommendations based on the provided keywords.
932
- # This function can be expanded with a proper API or custom logic.
933
- # """
934
- # # Placeholder for fetching recommendations based on keywords
935
- # return f"Recommendations for: {keywords}" # Dummy return for now
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936
 
937
 
938
 
 
456
  subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
457
  subprocess.check_call(["pip", "install", "genai"])
458
  subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
459
+ subprocess.check_call(["pip", "install", "google-api-python-client"])
460
  import transformers
461
  import torch
462
  import os
 
676
  import re
677
  from collections import Counter
678
  from googleapiclient.discovery import build
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
  # def process_youtube_video(url="", keywords=""):
680
  # try:
681
  # # Initialize variables
 
707
  # if not text.strip():
708
  # raise ValueError("Transcript is empty")
709
 
 
 
 
 
 
 
 
710
  # # Generate summary
711
  # model = genai.GenerativeModel("gemini-pro")
712
+ # summary = model.generate_content(f"Summarize this: {text[:4000]}").text
713
 
714
  # # Extract subtitle information
715
+ # subtitle_info = extract_subtitle_info(text)
716
+
717
+ # # Sentiment analysis
718
+ # sentiment = TextBlob(text[:1000]).sentiment
719
+ # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
720
 
721
  # except TranscriptsDisabled:
722
  # metadata = get_video_metadata(video_id)
 
745
  # """
746
  # Extracts the video ID from a YouTube URL.
747
  # """
748
+ # import re
749
  # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
750
  # return match.group(1) if match else None
751
 
 
755
  # Fetches video metadata such as title and description using the YouTube Data API.
756
  # """
757
  # try:
758
+ # from googleapiclient.discovery import build
759
+
760
+ # # Replace with your YouTube Data API key
761
+ # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
762
  # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
763
  # request = youtube.videos().list(part="snippet", id=video_id)
764
  # response = request.execute()
 
785
  # sentences = text.split(". ")
786
 
787
  # # Example: Extract key topics or keywords
788
+ # from collections import Counter
789
  # words = text.split()
790
  # common_words = Counter(words).most_common(10)
791
  # key_topics = ", ".join([word for word, count in common_words])
 
797
  # except Exception as e:
798
  # return f"Error extracting subtitle information: {str(e)}"
799
 
800
+ ##########
801
+ def process_youtube_video(url="", keywords=""):
802
+ try:
803
+ # Initialize variables
804
+ thumbnail = None
805
+ summary = "No transcript available"
806
+ sentiment_label = "N/A"
807
+ recommendations = ""
808
+ subtitle_info = "No additional information available"
809
 
810
+ if not url.strip():
811
+ return None, "Please enter a YouTube URL", "N/A", "", ""
 
 
 
 
 
812
 
813
+ video_id = extract_video_id(url)
814
+ if not video_id:
815
+ return None, "Invalid YouTube URL", "N/A", "", ""
816
 
817
+ thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
818
+
819
+ try:
820
+ # Fetch transcript
821
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
822
+ transcript = None
823
+ try:
824
+ transcript = transcript_list.find_transcript(['en'])
825
+ except:
826
+ transcript = transcript_list.find_generated_transcript(['en'])
827
+
828
+ text = " ".join([t['text'] for t in transcript.fetch()])
829
+ if not text.strip():
830
+ raise ValueError("Transcript is empty")
831
+
832
+ # Clean up the text for sentiment analysis
833
+ cleaned_text = clean_text_for_analysis(text)
834
+
835
+ # Sentiment analysis
836
+ sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
837
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
838
+
839
+ # Generate summary
840
+ model = genai.GenerativeModel("gemini-pro")
841
+ summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
842
+
843
+ # Extract subtitle information
844
+ subtitle_info = extract_subtitle_info(cleaned_text)
845
+
846
+ except TranscriptsDisabled:
847
+ metadata = get_video_metadata(video_id)
848
+ summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
849
+ sentiment_label = "N/A"
850
+ subtitle_info = "No subtitles available for analysis."
851
+ except NoTranscriptFound:
852
+ metadata = get_video_metadata(video_id)
853
+ summary = metadata.get("description", "⚠️ No English transcript available.")
854
+ sentiment_label = "N/A"
855
+ subtitle_info = "No subtitles available for analysis."
856
+ except Exception as e:
857
+ return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
858
+
859
+ # Get recommendations
860
+ if keywords.strip():
861
+ recommendations = get_recommendations(keywords)
862
+
863
+ return thumbnail, summary, sentiment_label, subtitle_info, recommendations
864
+
865
+ except Exception as e:
866
+ return None, f"Error: {str(e)}", "N/A", "", ""
867
+
868
+
869
+ def extract_video_id(url):
870
+ """
871
+ Extracts the video ID from a YouTube URL.
872
+ """
873
+ match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
874
+ return match.group(1) if match else None
875
+
876
+
877
+ def get_video_metadata(video_id):
878
+ """
879
+ Fetches video metadata such as title and description using the YouTube Data API.
880
+ """
881
+ try:
882
+ YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
883
+ youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
884
+ request = youtube.videos().list(part="snippet", id=video_id)
885
+ response = request.execute()
886
+
887
+ if "items" in response and len(response["items"]) > 0:
888
+ snippet = response["items"][0]["snippet"]
889
+ return {
890
+ "title": snippet.get("title", "No title available"),
891
+ "description": snippet.get("description", "No description available"),
892
+ }
893
+ return {}
894
+
895
+ except Exception as e:
896
+ return {"title": "Error fetching metadata", "description": str(e)}
897
+
898
+
899
+ def extract_subtitle_info(text):
900
+ """
901
+ Extracts meaningful information from the subtitles.
902
+ This could include topics, key insights, or a breakdown of the content.
903
+ """
904
+ try:
905
+ # Split text into sentences for better analysis
906
+ sentences = text.split(". ")
907
+
908
+ # Example: Extract key topics or keywords
909
+ words = text.split()
910
+ common_words = Counter(words).most_common(10)
911
+ key_topics = ", ".join([word for word, count in common_words])
912
+
913
+ # Example: Provide a breakdown of the content
914
+ info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
915
+
916
+ return info
917
+ except Exception as e:
918
+ return f"Error extracting subtitle information: {str(e)}"
919
+
920
+
921
+ def clean_text_for_analysis(text):
922
+ """
923
+ Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
924
+ """
925
+ # Remove extra spaces and line breaks
926
+ cleaned_text = " ".join(text.split())
927
+ return cleaned_text
928
+
929
+
930
+ def get_recommendations(keywords):
931
+ """
932
+ Fetches related video recommendations based on the provided keywords.
933
+ This function can be expanded with a proper API or custom logic.
934
+ """
935
+ # Placeholder for fetching recommendations based on keywords
936
+ return f"Recommendations for: {keywords}" # Dummy return for now
937
 
938
 
939