Spaces:
Build error
Build error
Upload folder using huggingface_hub
Browse files- README.md +2 -8
- requirements.txt +4 -0
- scraper.py +69 -0
- youtube_comments_sentiment.csv +19 -0
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: red
|
| 5 |
-
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.19.2
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: brighter-india-yt-scraper
|
| 3 |
+
app_file: scraper.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 4.19.2
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
pandas
|
| 3 |
+
transformers
|
| 4 |
+
google-api-python-client
|
scraper.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from googleapiclient.discovery import build
|
| 5 |
+
from transformers import pipeline
|
| 6 |
+
|
| 7 |
+
# Set up YouTube Data API credentials and initialize
|
| 8 |
+
api_key = "AIzaSyBUX6ak7fd2KEh-2aUM_aH26jVEw6Wj5V4" # Replace with your own API key
|
| 9 |
+
youtube = build('youtube', 'v3', developerKey=api_key)
|
| 10 |
+
|
| 11 |
+
# Initialize sentiment analysis pipeline
|
| 12 |
+
sentiment_pipeline = pipeline("sentiment-analysis")
|
| 13 |
+
|
| 14 |
+
def get_video_comments(video_id, max_length=512):
|
| 15 |
+
comments = []
|
| 16 |
+
next_page_token = None
|
| 17 |
+
|
| 18 |
+
while True:
|
| 19 |
+
response = youtube.commentThreads().list(
|
| 20 |
+
part='snippet',
|
| 21 |
+
videoId=video_id,
|
| 22 |
+
pageToken=next_page_token if next_page_token else ''
|
| 23 |
+
).execute()
|
| 24 |
+
|
| 25 |
+
for item in response['items']:
|
| 26 |
+
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
|
| 27 |
+
# Truncate the comment if it exceeds the maximum length
|
| 28 |
+
comment = comment[:max_length]
|
| 29 |
+
comments.append(comment)
|
| 30 |
+
|
| 31 |
+
next_page_token = response.get('nextPageToken')
|
| 32 |
+
|
| 33 |
+
if not next_page_token:
|
| 34 |
+
break
|
| 35 |
+
|
| 36 |
+
return comments
|
| 37 |
+
|
| 38 |
+
def analyze_sentiment(comments):
|
| 39 |
+
if comments: # Ensure there are comments to analyze
|
| 40 |
+
results = sentiment_pipeline(comments)
|
| 41 |
+
return results
|
| 42 |
+
else:
|
| 43 |
+
return []
|
| 44 |
+
|
| 45 |
+
def process_video(yt_link):
|
| 46 |
+
video_id = yt_link.split("=")[-1] # Extract video ID from the link
|
| 47 |
+
comments = get_video_comments(video_id)
|
| 48 |
+
sentiment_results = analyze_sentiment(comments)
|
| 49 |
+
|
| 50 |
+
# Create a DataFrame from the comments and sentiment analysis results
|
| 51 |
+
df = pd.DataFrame({
|
| 52 |
+
'Comments': comments,
|
| 53 |
+
'Sentiment': [result['label'] for result in sentiment_results],
|
| 54 |
+
'Score': [result['score'] for result in sentiment_results]
|
| 55 |
+
})
|
| 56 |
+
|
| 57 |
+
return df
|
| 58 |
+
|
| 59 |
+
# Define the Gradio interface
|
| 60 |
+
iface = gr.Interface(
|
| 61 |
+
fn=process_video,
|
| 62 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter YouTube video URL here..."),
|
| 63 |
+
outputs="dataframe",
|
| 64 |
+
title="YouTube Video Comments Sentiment Analysis",
|
| 65 |
+
description="Enter a YouTube video link to analyze the sentiment of its comments."
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Launch the interface
|
| 69 |
+
iface.launch(share=True)
|
youtube_comments_sentiment.csv
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Comments,Sentiment,Score
|
| 2 |
+
❤❤❤❤❤❤❤❤,NEGATIVE,0.6970567107200623
|
| 3 |
+
"Che bellezza, è un amore❤❤❤❤",POSITIVE,0.7507229447364807
|
| 4 |
+
couch insurance (what’s a scratching post ?),NEGATIVE,0.998101532459259
|
| 5 |
+
"Forps, you can bite my hand anytime you want to. Okay? Love you, Forps.",POSITIVE,0.9997705817222595
|
| 6 |
+
The insurance will be void because you actually have a squirrel.,NEGATIVE,0.9991538524627686
|
| 7 |
+
<Announcer Voice> Forps wins!,POSITIVE,0.9996067881584167
|
| 8 |
+
Reason for insurance: Forps the squirrel-raccoon!,NEGATIVE,0.975786566734314
|
| 9 |
+
Forps with the wide-eyed stare!,POSITIVE,0.9920353293418884
|
| 10 |
+
That love bite ❤,POSITIVE,0.9913235902786255
|
| 11 |
+
Forps is fierce.,POSITIVE,0.9994118213653564
|
| 12 |
+
😍🥰😘🐾🐈⬛🥰😍forps,NEGATIVE,0.6970567107200623
|
| 13 |
+
"Forps is too big, this is scary more than cute now 😮 still very cute 😅",POSITIVE,0.946954071521759
|
| 14 |
+
Flooficus Majesticus,POSITIVE,0.9862658381462097
|
| 15 |
+
😂👍,NEGATIVE,0.6970567107200623
|
| 16 |
+
schöne Katze,NEGATIVE,0.9197412133216858
|
| 17 |
+
naughty:3,NEGATIVE,0.6425185203552246
|
| 18 |
+
The last stop punch tho 😅,NEGATIVE,0.9785932302474976
|
| 19 |
+
"Sassy little guy, isn’t he?",POSITIVE,0.9434502720832825
|