Spaces:

Abu1998
/

Youtube_Likes_Extractor

Build error

App Files Files Community

Abu1998 commited on Dec 9, 2024

Commit

3f279da

verified ·

1 Parent(s): 9b3cfd9

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -93

app.py CHANGED Viewed

@@ -1,93 +1,126 @@
-import gradio as gr
-import requests
-import csv
-import re
-from datetime import timedelta
-# YouTube API key and required endpoint
-API_KEY = 'YOUR_YOUTUBE_API_KEY'  # Replace with your API key
-YOUTUBE_API_URL = 'https://www.googleapis.com/youtube/v3/videos'
-def parse_duration(duration):
-    """
-    Parse ISO 8601 duration string to seconds.
-    Example: "PT1H10M5S" -> 4205 seconds
-    """
-    regex = re.match(r"PT(\d+H)?(\d+M)?(\d+S)?", duration)
-    hours = int(regex.group(1)[:-1] if regex.group(1) else 0)
-    minutes = int(regex.group(2)[:-1] if regex.group(2) else 0)
-    seconds = int(regex.group(3)[:-1] if regex.group(3) else 0)
-    return hours * 3600 + minutes * 60 + seconds
-def fetch_video_data(video_ids):
-    """
-    Fetch video details (title, duration) from YouTube API.
-    """
-    video_data = []
-    total_time_spent = 0  # Total time spent in seconds
-    for video_id in video_ids:
-        params = {
-            'part': 'contentDetails,snippet',
-            'id': video_id,
-            'key': API_KEY
-        }
-        response = requests.get(YOUTUBE_API_URL, params=params)
-        data = response.json()
-        if 'items' in data:
-            item = data['items'][0]
-            title = item['snippet']['title']
-            duration = item['contentDetails']['duration']
-            video_duration_seconds = parse_duration(duration)
-            total_time_spent += video_duration_seconds
-            video_data.append({
-                'Video ID': video_id,
-                'Title': title,
-                'Duration (seconds)': video_duration_seconds
-            })
-    # Convert total time spent to hours and minutes
-    total_time_spent_in_hours = str(timedelta(seconds=total_time_spent))
-    # Save data to CSV
-    filename = "videos_time_spent.csv"
-    with open(filename, mode="w", newline="", encoding="utf-8") as file:
-        writer = csv.DictWriter(file, fieldnames=["Video ID", "Title", "Duration (seconds)"])
-        writer.writeheader()
-        writer.writerows(video_data)
-    return filename, total_time_spent_in_hours
-def gradio_interface(video_ids):
-    """
-    Gradio interface function to process video IDs and generate time statistics.
-    """
-    video_ids_list = video_ids.split(",")  # Convert input string to list
-    csv_file, total_time = fetch_video_data(video_ids_list)
-    return f"Total time spent: {total_time} on these videos. Download the CSV below.", csv_file
-# Gradio App setup
-with gr.Blocks() as demo:
-    gr.Markdown("### Time Spent on YouTube Videos")
-    gr.Markdown("Enter a list of YouTube video IDs (comma separated) to calculate the time spent on each video.")
-    with gr.Row():
-        video_ids_input = gr.Textbox(label="Enter Video IDs", placeholder="e.g., dQw4w9WgXcQ, kJQP7kiw5Fk")
-    with gr.Row():
-        submit_button = gr.Button("Calculate Time Spent")
-    with gr.Row():
-        message_output = gr.Textbox(label="Result", interactive=False)
-        download_link = gr.File(label="Download CSV")
-    submit_button.click(
-        fn=gradio_interface,
-        inputs=[video_ids_input],
-        outputs=[message_output, download_link]
-    )
-# Launch the app
-demo.launch()

+import streamlit as st
+import pandas as pd
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+import time
+# Streamlit UI
+st.title("YouTube Video Comments Extractor")
+# User input for API key and search query
+api_key = st.text_input("Enter your YouTube API Key", type="password")
+search_query = st.text_input("Enter the Search Query (e.g., MrBeast)")
+num_videos = st.number_input("Number of Videos to Scrape", min_value=1, max_value=50, step=1)
+# Function to search for videos based on a query
+def search_videos(api_key, query, max_results):
+    youtube = build('youtube', 'v3', developerKey=api_key)
+    response = youtube.search().list(
+        part='snippet',
+        q=query,
+        type='video',
+        maxResults=max_results
+    ).execute()
+    videos = []
+    for item in response['items']:
+        videos.append({
+            'video_id': item['id']['videoId'],
+            'title': item['snippet']['title']
+        })
+    return videos
+# Function to extract comments from a video
+def get_video_comments(api_key, video_id):
+    youtube = build('youtube', 'v3', developerKey=api_key)
+    comments = []
+    next_page_token = None
+    while True:
+        try:
+            response = youtube.commentThreads().list(
+                part='snippet,replies',
+                videoId=video_id,
+                maxResults=100,
+                pageToken=next_page_token
+            ).execute()
+            for item in response['items']:
+                comment = item['snippet']['topLevelComment']['snippet']
+                comments.append({
+                    'VideoID': video_id,
+                    'Channel': comment.get('authorChannelUrl', ''),
+                    'CommentedDateTime': comment['publishedAt'],
+                    'NumOfCommentlikes': comment['likeCount'],
+                    'Comment': comment['textDisplay'],
+                    'CommentedUserID': comment['authorChannelId']['value']
+                })
+                # Handle replies (if any)
+                if 'replies' in item:
+                    for reply in item['replies']['comments']:
+                        reply_snippet = reply['snippet']
+                        comments.append({
+                            'VideoID': video_id,
+                            'Channel': reply_snippet.get('authorChannelUrl', ''),
+                            'CommentedDateTime': reply_snippet['publishedAt'],
+                            'NumOfCommentlikes': reply_snippet['likeCount'],
+                            'Comment': reply_snippet['textDisplay'],
+                            'CommentedUserID': reply_snippet['authorChannelId']['value']
+                        })
+            next_page_token = response.get('nextPageToken')
+            if not next_page_token:
+                break
+        except HttpError as e:
+            st.error(f"An error occurred while fetching comments: {e}")
+            break
+    return comments
+# Action button
+if st.button("Start Scraping"):
+    if api_key and search_query and num_videos:
+        st.text("Searching for videos...")
+        videos = search_videos(api_key, search_query, num_videos)
+        if not videos:
+            st.warning("No videos found for the given query.")
+        else:
+            st.text(f"Found {len(videos)} videos. Extracting comments...")
+            all_comments = []
+            progress_bar = st.progress(0)
+            for idx, video in enumerate(videos):
+                video_id = video['video_id']
+                st.text(f"Fetching comments for video: {video['title']} (ID: {video_id})")
+                video_comments = get_video_comments(api_key, video_id)
+                all_comments.extend(video_comments)
+                # Update progress bar
+                progress = (idx + 1) / len(videos)
+                progress_bar.progress(progress)
+            # Save all comments to a CSV file
+            if all_comments:
+                df_comments = pd.DataFrame(all_comments)
+                csv_file = f"{search_query.replace(' ', '_')}_comments.csv"
+                df_comments.to_csv(csv_file, index=False)
+                st.success(f"Comments extracted and saved to {csv_file}")
+                # Show dataframe details
+                st.write("First 5 rows of the extracted comments:")
+                st.dataframe(df_comments.head())
+                st.download_button(
+                    label="Download CSV",
+                    data=df_comments.to_csv(index=False),
+                    file_name=csv_file,
+                    mime='text/csv'
+                )
+            else:
+                st.warning("No comments found for the selected videos.")
+    else:
+        st.warning("Please enter your API key, search query, and number of videos.")