Spaces:

temo12
/

Auto-Insight

Sleeping

App Files Files Community

temo12 commited on Dec 31, 2024

Commit

8946ed9

verified ·

1 Parent(s): cd097b0

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -228

app.py CHANGED Viewed

@@ -1,234 +1,6 @@
-import spacy
-from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
-from googleapiclient.discovery import build
-from fpdf import FPDF
-import pandas as pd
-import re
-from wordcloud import WordCloud
-import matplotlib.pyplot as plt
-# Initialize Spacy and VADER
-nlp = spacy.load("en_core_web_sm")
-sia = SentimentIntensityAnalyzer()
-# YouTube Data API key
-YOUTUBE_API_KEY = "AIzaSyBlI0XNuRAlG7WF3wlsiD5cUkIw7cmhER4"
-def fetch_video_metadata(video_url):
-    video_id = video_url.split('v=')[-1]
-    youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
-    try:
-        request = youtube.videos().list(part="snippet,statistics", id=video_id)
-        response = request.execute()
-        video_data = response['items'][0]
-        metadata = {
-            "channel_name": video_data['snippet']['channelTitle'],
-            "video_title": video_data['snippet']['title'],
-            "views": video_data['statistics']['viewCount'],
-            "likes": video_data['statistics'].get('likeCount', 'N/A'),
-            "dislikes": video_data['statistics'].get('dislikeCount', 'N/A'),
-            "posted_date": video_data['snippet']['publishedAt']
-        }
-        return metadata, None
-    except VideoUnavailable:
-        return None, "Video is unavailable."
-    except Exception as e:
-        return None, str(e)
-def fetch_transcript(video_url):
-    video_id = video_url.split('v=')[-1]
-    try:
-        transcript = YouTubeTranscriptApi.get_transcript(video_id)
-        text = " ".join([t['text'] for t in transcript])
-        return text, None
-    except (TranscriptsDisabled, VideoUnavailable):
-        return None, "Transcript not available for this video."
-    except Exception as e:
-        return None, str(e)
-def split_long_sentences(text):
-    doc = nlp(text)  # Tokenize into sentences using Spacy
-    sentences = []
-    for sent in doc.sents:
-        if len(sent.text.split()) > 25:
-            sub_sentences = []
-            current_chunk = []
-            for token in sent:
-                current_chunk.append(token.text)
-                if token.is_punct and token.text in {".", "!", "?"}:
-                    sub_sentences.append(" ".join(current_chunk).strip())
-                    current_chunk = []
-                elif token.text.lower() in {"and", "but", "because", "so"}:
-                    if len(current_chunk) > 3:
-                        sub_sentences.append(" ".join(current_chunk).strip())
-                        current_chunk = []
-            if current_chunk:
-                sub_sentences.append(" ".join(current_chunk).strip())
-            sentences.extend(sub_sentences)
-        else:
-            sentences.append(sent.text.strip())
-    return sentences
-def read_keywords(file_path):
-    df = pd.read_excel(file_path)
-    attributes = df.columns.tolist()
-    keywords = {}
-    for attribute in attributes:
-        keywords[attribute] = df[attribute].dropna().tolist()
-    return keywords, attributes
-def match_keywords_in_sentences(sentences, keywords):
-    matched_keywords = {attribute: [] for attribute in keywords}
-    for sentence in sentences:
-        for attribute, sub_keywords in keywords.items():
-            for keyword in sub_keywords:
-                if keyword.lower() in sentence.lower():
-                    matched_keywords[attribute].append(sentence)
-    return matched_keywords
-def analyze_sentiment_for_keywords(matched_keywords, sentences):
-    sentiment_results = {}
-    for attribute, sentences_list in matched_keywords.items():
-        positive_lines = []
-        negative_lines = []
-        for line in sentences_list:
-            sentiment = sia.polarity_scores(line)
-            if sentiment['compound'] > 0.05:
-                positive_lines.append((line.strip(), sentiment['compound']))
-            elif sentiment['compound'] < -0.05:
-                negative_lines.append((line.strip(), sentiment['compound']))
-        sentiment_results[attribute] = {
-            "positive": positive_lines,
-            "negative": negative_lines
-        }
-    return sentiment_results
-def generate_word_clouds(matched_keywords):
-    wordclouds = {}
-    for attribute, sentences_list in matched_keywords.items():
-        text = " ".join(sentences_list)
-        wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text)
-        wordclouds[attribute] = wordcloud
-        plt.figure(figsize=(10, 5))
-        plt.imshow(wordcloud, interpolation='bilinear')
-        plt.axis("off")
-        plt.title(f"Word Cloud for {attribute}")
-        plt.show()
-    return wordclouds
-def generate_pdf_with_sections(metadata, sentiment_results, wordclouds, output_file="Analysis_Report.pdf"):
-    pdf = FPDF()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    # Add Metadata
-    pdf.set_font("Arial", "B", 16)
-    pdf.cell(200, 10, "Auto-Insight: YouTube Video Sentiment & Attribute Analysis Report", ln=True, align="C")
-    pdf.ln(10)
-    if metadata:
-        pdf.set_font("Arial", "B", 14)
-        pdf.cell(0, 10, "Video Metadata", ln=True)
-        pdf.set_font("Arial", size=12)
-        for key, value in metadata.items():
-            pdf.cell(0, 10, f"{key.replace('_', ' ').title()}: {value}", ln=True)
-        pdf.ln(10)
-    # Add Sections for Each Attribute
-    for attribute, sentiments in sentiment_results.items():
-        pdf.add_page()
-        pdf.set_font("Arial", "B", 14)
-        pdf.cell(0, 10, f"Attribute: {attribute}", ln=True)
-        pdf.ln(5)
-        # Add Positive Sentiments
-        pdf.set_font("Arial", "B", 12)
-        pdf.cell(0, 10, "Positive Sentiments:", ln=True)
-        pdf.set_font("Arial", size=12)
-        for line, score in sentiments["positive"]:
-            pdf.multi_cell(0, 10, f"Line: {line}\nScore: {score}")
-            pdf.ln(2)
-        # Add Negative Sentiments
-        pdf.set_font("Arial", "B", 12)
-        pdf.cell(0, 10, "Negative Sentiments:", ln=True)
-        pdf.set_font("Arial", size=12)
-        for line, score in sentiments["negative"]:
-            pdf.multi_cell(0, 10, f"Line: {line}\nScore: {score}")
-            pdf.ln(2)
-        # Add Word Cloud
-        if attribute in wordclouds:
-            plt.imshow(wordclouds[attribute], interpolation='bilinear')
-            plt.axis("off")
-            plt.savefig(f"{attribute}_wordcloud.png")
-            pdf.image(f"{attribute}_wordcloud.png", x=10, y=80, w=180)
-            plt.close()
-    pdf.output(output_file)
-    return output_file
-import gradio as gr
-def process_keywords_and_video(url, excel_file):
-    metadata, error = fetch_video_metadata(url)
-    if error:
-        return error, None
-    transcript, error = fetch_transcript(url)
-    if error:
-        return error, None
-    sentences = split_long_sentences(transcript)
-    keywords, attributes = read_keywords(excel_file)
-    matched_keywords = match_keywords_in_sentences(sentences, keywords)
-    sentiment_results = analyze_sentiment_for_keywords(matched_keywords, sentences)
-    wordclouds = generate_word_clouds(matched_keywords)
-    pdf_file = generate_pdf_with_sections(metadata, sentiment_results, wordclouds)
-    return "Processing completed successfully!", pdf_file
 # Gradio App
 with gr.Blocks() as iface:


1
2
3




































































































































































































































4
5	# Gradio App
6	with gr.Blocks() as iface: