Sayiqa commited on
Commit
4e7cbd0
·
verified ·
1 Parent(s): d1e6180

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -146
app.py CHANGED
@@ -675,128 +675,6 @@ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, No
675
  import re
676
  from collections import Counter
677
  from googleapiclient.discovery import build
678
- # def process_youtube_video(url="", keywords=""):
679
- # try:
680
- # # Initialize variables
681
- # thumbnail = None
682
- # summary = "No transcript available"
683
- # sentiment_label = "N/A"
684
- # recommendations = ""
685
- # subtitle_info = "No additional information available"
686
-
687
- # if not url.strip():
688
- # return None, "Please enter a YouTube URL", "N/A", "", ""
689
-
690
- # video_id = extract_video_id(url)
691
- # if not video_id:
692
- # return None, "Invalid YouTube URL", "N/A", "", ""
693
-
694
- # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
695
-
696
- # try:
697
- # # Fetch transcript
698
- # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
699
- # transcript = None
700
- # try:
701
- # transcript = transcript_list.find_transcript(['en'])
702
- # except:
703
- # transcript = transcript_list.find_generated_transcript(['en'])
704
-
705
- # text = " ".join([t['text'] for t in transcript.fetch()])
706
- # if not text.strip():
707
- # raise ValueError("Transcript is empty")
708
-
709
- # # Generate summary
710
- # model = genai.GenerativeModel("gemini-pro")
711
- # summary = model.generate_content(f"Summarize this: {text[:4000]}").text
712
-
713
- # # Extract subtitle information
714
- # subtitle_info = extract_subtitle_info(text)
715
-
716
- # # Sentiment analysis
717
- # sentiment = TextBlob(text[:1000]).sentiment
718
- # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
719
-
720
- # except TranscriptsDisabled:
721
- # metadata = get_video_metadata(video_id)
722
- # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
723
- # sentiment_label = "N/A"
724
- # subtitle_info = "No subtitles available for analysis."
725
- # except NoTranscriptFound:
726
- # metadata = get_video_metadata(video_id)
727
- # summary = metadata.get("description", "⚠️ No English transcript available.")
728
- # sentiment_label = "N/A"
729
- # subtitle_info = "No subtitles available for analysis."
730
- # except Exception as e:
731
- # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
732
-
733
- # # Get recommendations
734
- # if keywords.strip():
735
- # recommendations = get_recommendations(keywords)
736
-
737
- # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
738
-
739
- # except Exception as e:
740
- # return None, f"Error: {str(e)}", "N/A", "", ""
741
-
742
-
743
- # def extract_video_id(url):
744
- # """
745
- # Extracts the video ID from a YouTube URL.
746
- # """
747
- # import re
748
- # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
749
- # return match.group(1) if match else None
750
-
751
-
752
- # def get_video_metadata(video_id):
753
- # """
754
- # Fetches video metadata such as title and description using the YouTube Data API.
755
- # """
756
- # try:
757
- # from googleapiclient.discovery import build
758
-
759
- # # Replace with your YouTube Data API key
760
- # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
761
- # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
762
- # request = youtube.videos().list(part="snippet", id=video_id)
763
- # response = request.execute()
764
-
765
- # if "items" in response and len(response["items"]) > 0:
766
- # snippet = response["items"][0]["snippet"]
767
- # return {
768
- # "title": snippet.get("title", "No title available"),
769
- # "description": snippet.get("description", "No description available"),
770
- # }
771
- # return {}
772
-
773
- # except Exception as e:
774
- # return {"title": "Error fetching metadata", "description": str(e)}
775
-
776
-
777
- # def extract_subtitle_info(text):
778
- # """
779
- # Extracts meaningful information from the subtitles.
780
- # This could include topics, key insights, or a breakdown of the content.
781
- # """
782
- # try:
783
- # # Split text into sentences for better analysis
784
- # sentences = text.split(". ")
785
-
786
- # # Example: Extract key topics or keywords
787
- # from collections import Counter
788
- # words = text.split()
789
- # common_words = Counter(words).most_common(10)
790
- # key_topics = ", ".join([word for word, count in common_words])
791
-
792
- # # Example: Provide a breakdown of the content
793
- # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
794
-
795
- # return info
796
- # except Exception as e:
797
- # return f"Error extracting subtitle information: {str(e)}"
798
-
799
-
800
  def process_youtube_video(url="", keywords=""):
801
  try:
802
  # Initialize variables
@@ -828,19 +706,16 @@ def process_youtube_video(url="", keywords=""):
828
  if not text.strip():
829
  raise ValueError("Transcript is empty")
830
 
831
- # Clean up the text for sentiment analysis
832
- cleaned_text = clean_text_for_analysis(text)
833
-
834
- # Sentiment analysis
835
- sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
836
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
837
-
838
  # Generate summary
839
  model = genai.GenerativeModel("gemini-pro")
840
- summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
841
 
842
  # Extract subtitle information
843
- subtitle_info = extract_subtitle_info(cleaned_text)
 
 
 
 
844
 
845
  except TranscriptsDisabled:
846
  metadata = get_video_metadata(video_id)
@@ -869,6 +744,7 @@ def extract_video_id(url):
869
  """
870
  Extracts the video ID from a YouTube URL.
871
  """
 
872
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
873
  return match.group(1) if match else None
874
 
@@ -878,7 +754,10 @@ def get_video_metadata(video_id):
878
  Fetches video metadata such as title and description using the YouTube Data API.
879
  """
880
  try:
881
- YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
 
 
 
882
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
883
  request = youtube.videos().list(part="snippet", id=video_id)
884
  response = request.execute()
@@ -905,6 +784,7 @@ def extract_subtitle_info(text):
905
  sentences = text.split(". ")
906
 
907
  # Example: Extract key topics or keywords
 
908
  words = text.split()
909
  common_words = Counter(words).most_common(10)
910
  key_topics = ", ".join([word for word, count in common_words])
@@ -916,23 +796,143 @@ def extract_subtitle_info(text):
916
  except Exception as e:
917
  return f"Error extracting subtitle information: {str(e)}"
918
 
 
 
 
 
 
 
 
 
 
919
 
920
- def clean_text_for_analysis(text):
921
- """
922
- Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
923
- """
924
- # Remove extra spaces and line breaks
925
- cleaned_text = " ".join(text.split())
926
- return cleaned_text
927
 
 
 
 
928
 
929
- def get_recommendations(keywords):
930
- """
931
- Fetches related video recommendations based on the provided keywords.
932
- This function can be expanded with a proper API or custom logic.
933
- """
934
- # Placeholder for fetching recommendations based on keywords
935
- return f"Recommendations for: {keywords}" # Dummy return for now
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936
 
937
 
938
 
 
675
  import re
676
  from collections import Counter
677
  from googleapiclient.discovery import build
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678
  def process_youtube_video(url="", keywords=""):
679
  try:
680
  # Initialize variables
 
706
  if not text.strip():
707
  raise ValueError("Transcript is empty")
708
 
 
 
 
 
 
 
 
709
  # Generate summary
710
  model = genai.GenerativeModel("gemini-pro")
711
+ summary = model.generate_content(f"Summarize this: {text[:4000]}").text
712
 
713
  # Extract subtitle information
714
+ subtitle_info = extract_subtitle_info(text)
715
+
716
+ # Sentiment analysis
717
+ sentiment = TextBlob(text[:1000]).sentiment
718
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
719
 
720
  except TranscriptsDisabled:
721
  metadata = get_video_metadata(video_id)
 
744
  """
745
  Extracts the video ID from a YouTube URL.
746
  """
747
+ import re
748
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
749
  return match.group(1) if match else None
750
 
 
754
  Fetches video metadata such as title and description using the YouTube Data API.
755
  """
756
  try:
757
+ from googleapiclient.discovery import build
758
+
759
+ # Replace with your YouTube Data API key
760
+ YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
761
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
762
  request = youtube.videos().list(part="snippet", id=video_id)
763
  response = request.execute()
 
784
  sentences = text.split(". ")
785
 
786
  # Example: Extract key topics or keywords
787
+ from collections import Counter
788
  words = text.split()
789
  common_words = Counter(words).most_common(10)
790
  key_topics = ", ".join([word for word, count in common_words])
 
796
  except Exception as e:
797
  return f"Error extracting subtitle information: {str(e)}"
798
 
799
+ ##########
800
+ # def process_youtube_video(url="", keywords=""):
801
+ # try:
802
+ # # Initialize variables
803
+ # thumbnail = None
804
+ # summary = "No transcript available"
805
+ # sentiment_label = "N/A"
806
+ # recommendations = ""
807
+ # subtitle_info = "No additional information available"
808
 
809
+ # if not url.strip():
810
+ # return None, "Please enter a YouTube URL", "N/A", "", ""
 
 
 
 
 
811
 
812
+ # video_id = extract_video_id(url)
813
+ # if not video_id:
814
+ # return None, "Invalid YouTube URL", "N/A", "", ""
815
 
816
+ # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
817
+
818
+ # try:
819
+ # # Fetch transcript
820
+ # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
821
+ # transcript = None
822
+ # try:
823
+ # transcript = transcript_list.find_transcript(['en'])
824
+ # except:
825
+ # transcript = transcript_list.find_generated_transcript(['en'])
826
+
827
+ # text = " ".join([t['text'] for t in transcript.fetch()])
828
+ # if not text.strip():
829
+ # raise ValueError("Transcript is empty")
830
+
831
+ # # Clean up the text for sentiment analysis
832
+ # cleaned_text = clean_text_for_analysis(text)
833
+
834
+ # # Sentiment analysis
835
+ # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
836
+ # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
837
+
838
+ # # Generate summary
839
+ # model = genai.GenerativeModel("gemini-pro")
840
+ # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
841
+
842
+ # # Extract subtitle information
843
+ # subtitle_info = extract_subtitle_info(cleaned_text)
844
+
845
+ # except TranscriptsDisabled:
846
+ # metadata = get_video_metadata(video_id)
847
+ # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
848
+ # sentiment_label = "N/A"
849
+ # subtitle_info = "No subtitles available for analysis."
850
+ # except NoTranscriptFound:
851
+ # metadata = get_video_metadata(video_id)
852
+ # summary = metadata.get("description", "⚠️ No English transcript available.")
853
+ # sentiment_label = "N/A"
854
+ # subtitle_info = "No subtitles available for analysis."
855
+ # except Exception as e:
856
+ # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
857
+
858
+ # # Get recommendations
859
+ # if keywords.strip():
860
+ # recommendations = get_recommendations(keywords)
861
+
862
+ # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
863
+
864
+ # except Exception as e:
865
+ # return None, f"Error: {str(e)}", "N/A", "", ""
866
+
867
+
868
+ # def extract_video_id(url):
869
+ # """
870
+ # Extracts the video ID from a YouTube URL.
871
+ # """
872
+ # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
873
+ # return match.group(1) if match else None
874
+
875
+
876
+ # def get_video_metadata(video_id):
877
+ # """
878
+ # Fetches video metadata such as title and description using the YouTube Data API.
879
+ # """
880
+ # try:
881
+ # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
882
+ # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
883
+ # request = youtube.videos().list(part="snippet", id=video_id)
884
+ # response = request.execute()
885
+
886
+ # if "items" in response and len(response["items"]) > 0:
887
+ # snippet = response["items"][0]["snippet"]
888
+ # return {
889
+ # "title": snippet.get("title", "No title available"),
890
+ # "description": snippet.get("description", "No description available"),
891
+ # }
892
+ # return {}
893
+
894
+ # except Exception as e:
895
+ # return {"title": "Error fetching metadata", "description": str(e)}
896
+
897
+
898
+ # def extract_subtitle_info(text):
899
+ # """
900
+ # Extracts meaningful information from the subtitles.
901
+ # This could include topics, key insights, or a breakdown of the content.
902
+ # """
903
+ # try:
904
+ # # Split text into sentences for better analysis
905
+ # sentences = text.split(". ")
906
+
907
+ # # Example: Extract key topics or keywords
908
+ # words = text.split()
909
+ # common_words = Counter(words).most_common(10)
910
+ # key_topics = ", ".join([word for word, count in common_words])
911
+
912
+ # # Example: Provide a breakdown of the content
913
+ # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
914
+
915
+ # return info
916
+ # except Exception as e:
917
+ # return f"Error extracting subtitle information: {str(e)}"
918
+
919
+
920
+ # def clean_text_for_analysis(text):
921
+ # """
922
+ # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
923
+ # """
924
+ # # Remove extra spaces and line breaks
925
+ # cleaned_text = " ".join(text.split())
926
+ # return cleaned_text
927
+
928
+
929
+ # def get_recommendations(keywords):
930
+ # """
931
+ # Fetches related video recommendations based on the provided keywords.
932
+ # This function can be expanded with a proper API or custom logic.
933
+ # """
934
+ # # Placeholder for fetching recommendations based on keywords
935
+ # return f"Recommendations for: {keywords}" # Dummy return for now
936
 
937
 
938