mkbackup commited on
Commit
7ff2308
·
1 Parent(s): ac521d2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+
4
+ os.system("python3 -m pip install --upgrade pip")
5
+ os.system("pip install httpx==0.24.1")
6
+ os.system("pip uninstall -y gradio")
7
+ os.system("pip install gradio==3.1.4")
8
+
9
+ import gradio as gr
10
+ import hopsworks
11
+ import joblib
12
+ import pandas as pd
13
+ from googleapiclient.discovery import build
14
+ import re
15
+
16
+ hopsworks_key = "LR2zRcmisfNRQu0h.Hk1RWXOxv3HzMk54dE7iYDFMawiK6PYxb42sjHx8iQsc7D0h6Fsy76Ult5OJFmSi"
17
+
18
+ youtube = build(
19
+ 'youtube',
20
+ 'v3',
21
+ developerKey="AIzaSyAOsM68BSlRzcCReBf1Houhoe9zvTAaNFU"
22
+ )
23
+
24
+ project = hopsworks.login(api_key_value=hopsworks_key)
25
+ fs = project.get_feature_store()
26
+
27
+
28
+ mr = project.get_model_registry()
29
+ model = mr.get_model("comments_model", version=1)
30
+ model_dir = model.download()
31
+ model = joblib.load(model_dir + "/comments_model.pkl")
32
+ vectorizer = joblib.load(model_dir + "/vectorizer.pkl")
33
+ print("Model downloaded")
34
+
35
+ def get_video_id(video_link):
36
+
37
+ # Define a regular expression pattern to match YouTube video URLs
38
+ pattern = (
39
+ r'(?:https?://)?(?:www\.)?'
40
+ '(?:youtube\.com/.*?[?&]v=|youtu\.be/|youtube\.com/embed/|youtube\.com/v/|youtube\.com/e/|youtube\.com/user/[^/]+/u/0/|www\.youtube\.com/user/[^/]+/u/0/|youtube\.com/s[^/]+/|www\.youtube\.com/s[^/]+/|youtube\.com/channel/|youtube\.com/c/|youtube\.com/user/[^/]+/|youtube\.com/user/[^/]+/live/|twitch\.tv/)'
41
+ '([^"&?/ ]{11})'
42
+ )
43
+
44
+ # Use re.search to find the video ID in the URL
45
+ match = re.search(pattern, video_link)
46
+
47
+ # If a match is found, return the video ID; otherwise, return None
48
+ return match.group(1) if match else None
49
+
50
+
51
+ def sentiment(video_link):
52
+ print("Calling function")
53
+ video_id = get_video_id(video_link)
54
+ request = youtube.commentThreads().list(
55
+ part="snippet",
56
+ videoId=video_id,
57
+ maxResults=100
58
+ )
59
+ response = request.execute()
60
+
61
+ comments = []
62
+ for item in response['items']:
63
+ comment = item['snippet']['topLevelComment']['snippet']
64
+ comment_text = ''.join(e for e in comment['textDisplay'] if (e.isalnum() or e.isspace()))
65
+ comments.append([comment_text])
66
+
67
+ df = pd.DataFrame(comments, columns=['comment'])
68
+ df = df.dropna(subset=['comment'])
69
+ comments_features = vectorizer.transform(df['comment'])
70
+ predictions = model.predict(comments_features)
71
+ positive_count = sum(predictions > 0)
72
+ negative_count = sum(predictions < 0)
73
+ total_count = len(predictions)
74
+ positive_percentage = (positive_count / total_count) * 100
75
+ negative_percentage = (negative_count / total_count) * 100
76
+ return positive_count, negative_count, f"{positive_percentage:.2f}%", f"{negative_percentage:.2f}%"
77
+
78
+ demo = gr.Interface(
79
+ fn=sentiment,
80
+ title="YouTube comment sentiment analysis",
81
+ description="Experiment with YouTube comments to predict the YouTube video sentiments.",
82
+ allow_flagging="never",
83
+ inputs=gr.Textbox(type="text", label="input YouTube video link",variable="video_link"),
84
+ outputs=[
85
+ gr.Number(label="The number of positive comments", default=0),
86
+ gr.Number(label="The number of negative comments", default=0),
87
+ gr.Textbox(label="Percentage of positive comments", name="positive_percentage"),
88
+ gr.Textbox(label="Percentage of negative comments", name="negative_percentage"),
89
+ ],
90
+ )
91
+
92
+ demo.launch(debug=True, share=True)
93
+