Spaces:

temo12
/

Auto-Insight

Sleeping

App Files Files Community

temo12 commited on Dec 31, 2024

Commit

6b62c63

verified ·

1 Parent(s): cd3b063

Create backend.py

Browse files

Files changed (1) hide show

backend.py +104 -0

backend.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# backend.py
+import spacy
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
+from googleapiclient.discovery import build
+import pandas as pd
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import re
+# Initialize Spacy and VADER
+nlp = spacy.load("en_core_web_sm")
+sia = SentimentIntensityAnalyzer()
+# YouTube Data API key
+YOUTUBE_API_KEY = "YOUR_YOUTUBE_API_KEY"
+# Fetch metadata of YouTube Video
+def fetch_video_metadata(video_url):
+    video_id = video_url.split('v=')[-1]
+    youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
+    try:
+        request = youtube.videos().list(part="snippet,statistics", id=video_id)
+        response = request.execute()
+        video_data = response['items'][0]
+        metadata = {
+            "channel_name": video_data['snippet']['channelTitle'],
+            "video_title": video_data['snippet']['title'],
+            "views": video_data['statistics']['viewCount'],
+            "likes": video_data['statistics'].get('likeCount', 'N/A'),
+            "dislikes": video_data['statistics'].get('dislikeCount', 'N/A'),
+            "posted_date": video_data['snippet']['publishedAt']
+        }
+        return metadata, None
+    except VideoUnavailable:
+        return None, "Video is unavailable."
+    except Exception as e:
+        return None, str(e)
+# Fetch the transcript for YouTube Video
+def fetch_transcript(video_url):
+    video_id = video_url.split('v=')[-1]
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        text = " ".join([t['text'] for t in transcript])
+        return text, None
+    except (TranscriptsDisabled, VideoUnavailable):
+        return None, "Transcript not available for this video."
+    except Exception as e:
+        return None, str(e)
+# Split long sentences into chunks for better processing
+def split_long_sentences(text):
+    doc = nlp(text)  # Tokenize into sentences using Spacy
+    sentences = []
+    for sent in doc.sents:
+        if len(sent.text.split()) > 25:
+            sub_sentences = []
+            current_chunk = []
+            for token in sent:
+                current_chunk.append(token.text)
+                if token.is_punct and token.text in {".", "!", "?"}:
+                    sub_sentences.append(" ".join(current_chunk).strip())
+                    current_chunk = []
+                elif token.text.lower() in {"and", "but", "because", "so"}:
+                    if len(current_chunk) > 3:
+                        sub_sentences.append(" ".join(current_chunk).strip())
+                        current_chunk = []
+            if current_chunk:
+                sub_sentences.append(" ".join(current_chunk).strip())
+            sentences.extend(sub_sentences)
+        else:
+            sentences.append(sent.text.strip())
+    return sentences
+# Read the keywords from the provided Excel file
+def read_keywords(file_path):
+    df = pd.read_excel(file_path)
+    attributes = df.columns.tolist()
+    keywords = {}
+    for attribute in attributes:
+        keywords[attribute] = df[attribute].dropna().tolist()
+    return keywords, attributes
+# Match keywords with sentences
+def match_keywords_in_sentences(sentences, keywords):
+    matched_keywords = {attribute: [] for attribute in keywords}
+    for sentence in sentences:
+        for attribute, sub_keywords in keywords.items():
+            for keyword in sub_keywords:
+                if keyword.lower() in sentence.lower():
+                    matched_keywords[attribute].append(sentence)
+    return matched_keywords