Spaces:

temo12
/

Auto-Insight

Sleeping

App Files Files Community

temo12 commited on Dec 31, 2024

Commit

3f160eb

verified ·

1 Parent(s): fcecf5e

Update app.py

Browse files

Files changed (1) hide show

app.py +213 -0

app.py CHANGED Viewed

@@ -1,3 +1,216 @@
 import gradio as gr
 def process_keywords_and_video(url, excel_file):

+!pip install spacy vaderSentiment youtube-transcript-api gradio pandas fpdf openpyxl google-api-python-client wordcloud matplotlib
+!python -m spacy download en_core_web_sm
+import spacy
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
+from googleapiclient.discovery import build
+from fpdf import FPDF
+import pandas as pd
+import re
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+# Initialize Spacy and VADER
+nlp = spacy.load("en_core_web_sm")
+sia = SentimentIntensityAnalyzer()
+# YouTube Data API key
+YOUTUBE_API_KEY = "AIzaSyBlI0XNuRAlG7WF3wlsiD5cUkIw7cmhER4"
+def fetch_video_metadata(video_url):
+    video_id = video_url.split('v=')[-1]
+    youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
+    try:
+        request = youtube.videos().list(part="snippet,statistics", id=video_id)
+        response = request.execute()
+        video_data = response['items'][0]
+        metadata = {
+            "channel_name": video_data['snippet']['channelTitle'],
+            "video_title": video_data['snippet']['title'],
+            "views": video_data['statistics']['viewCount'],
+            "likes": video_data['statistics'].get('likeCount', 'N/A'),
+            "dislikes": video_data['statistics'].get('dislikeCount', 'N/A'),
+            "posted_date": video_data['snippet']['publishedAt']
+        }
+        return metadata, None
+    except VideoUnavailable:
+        return None, "Video is unavailable."
+    except Exception as e:
+        return None, str(e)
+def fetch_transcript(video_url):
+    video_id = video_url.split('v=')[-1]
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        text = " ".join([t['text'] for t in transcript])
+        return text, None
+    except (TranscriptsDisabled, VideoUnavailable):
+        return None, "Transcript not available for this video."
+    except Exception as e:
+        return None, str(e)
+def split_long_sentences(text):
+    doc = nlp(text)  # Tokenize into sentences using Spacy
+    sentences = []
+    for sent in doc.sents:
+        if len(sent.text.split()) > 25:
+            sub_sentences = []
+            current_chunk = []
+            for token in sent:
+                current_chunk.append(token.text)
+                if token.is_punct and token.text in {".", "!", "?"}:
+                    sub_sentences.append(" ".join(current_chunk).strip())
+                    current_chunk = []
+                elif token.text.lower() in {"and", "but", "because", "so"}:
+                    if len(current_chunk) > 3:
+                        sub_sentences.append(" ".join(current_chunk).strip())
+                        current_chunk = []
+            if current_chunk:
+                sub_sentences.append(" ".join(current_chunk).strip())
+            sentences.extend(sub_sentences)
+        else:
+            sentences.append(sent.text.strip())
+    return sentences
+def read_keywords(file_path):
+    df = pd.read_excel(file_path)
+    attributes = df.columns.tolist()
+    keywords = {}
+    for attribute in attributes:
+        keywords[attribute] = df[attribute].dropna().tolist()
+    return keywords, attributes
+def match_keywords_in_sentences(sentences, keywords):
+    matched_keywords = {attribute: [] for attribute in keywords}
+    for sentence in sentences:
+        for attribute, sub_keywords in keywords.items():
+            for keyword in sub_keywords:
+                if keyword.lower() in sentence.lower():
+                    matched_keywords[attribute].append(sentence)
+    return matched_keywords
+def analyze_sentiment_for_keywords(matched_keywords, sentences):
+    sentiment_results = {}
+    for attribute, sentences_list in matched_keywords.items():
+        positive_lines = []
+        negative_lines = []
+        for line in sentences_list:
+            sentiment = sia.polarity_scores(line)
+            if sentiment['compound'] > 0.05:
+                positive_lines.append((line.strip(), sentiment['compound']))
+            elif sentiment['compound'] < -0.05:
+                negative_lines.append((line.strip(), sentiment['compound']))
+        sentiment_results[attribute] = {
+            "positive": positive_lines,
+            "negative": negative_lines
+        }
+    return sentiment_results
+def generate_word_clouds(matched_keywords):
+    wordclouds = {}
+    for attribute, sentences_list in matched_keywords.items():
+        text = " ".join(sentences_list)
+        wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text)
+        wordclouds[attribute] = wordcloud
+        plt.figure(figsize=(10, 5))
+        plt.imshow(wordcloud, interpolation='bilinear')
+        plt.axis("off")
+        plt.title(f"Word Cloud for {attribute}")
+        plt.show()
+    return wordclouds
+def generate_pdf_with_sections(metadata, sentiment_results, wordclouds, output_file="Analysis_Report.pdf"):
+    pdf = FPDF()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    pdf.add_page()
+    pdf.set_font("Arial", size=12)
+    # Add Metadata
+    pdf.set_font("Arial", "B", 16)
+    pdf.cell(200, 10, "Auto-Insight: YouTube Video Sentiment & Attribute Analysis Report", ln=True, align="C")
+    pdf.ln(10)
+    if metadata:
+        pdf.set_font("Arial", "B", 14)
+        pdf.cell(0, 10, "Video Metadata", ln=True)
+        pdf.set_font("Arial", size=12)
+        for key, value in metadata.items():
+            pdf.cell(0, 10, f"{key.replace('_', ' ').title()}: {value}", ln=True)
+        pdf.ln(10)
+    # Add Sections for Each Attribute
+    for attribute, sentiments in sentiment_results.items():
+        pdf.add_page()
+        pdf.set_font("Arial", "B", 14)
+        pdf.cell(0, 10, f"Attribute: {attribute}", ln=True)
+        pdf.ln(5)
+        # Add Positive Sentiments
+        pdf.set_font("Arial", "B", 12)
+        pdf.cell(0, 10, "Positive Sentiments:", ln=True)
+        pdf.set_font("Arial", size=12)
+        for line, score in sentiments["positive"]:
+            pdf.multi_cell(0, 10, f"Line: {line}\nScore: {score}")
+            pdf.ln(2)
+        # Add Negative Sentiments
+        pdf.set_font("Arial", "B", 12)
+        pdf.cell(0, 10, "Negative Sentiments:", ln=True)
+        pdf.set_font("Arial", size=12)
+        for line, score in sentiments["negative"]:
+            pdf.multi_cell(0, 10, f"Line: {line}\nScore: {score}")
+            pdf.ln(2)
+        # Add Word Cloud
+        if attribute in wordclouds:
+            plt.imshow(wordclouds[attribute], interpolation='bilinear')
+            plt.axis("off")
+            plt.savefig(f"{attribute}_wordcloud.png")
+            pdf.image(f"{attribute}_wordcloud.png", x=10, y=80, w=180)
+            plt.close()
+    pdf.output(output_file)
+    return output_file
 import gradio as gr
 def process_keywords_and_video(url, excel_file):