clementBE commited on
Commit
6f1cbb7
Β·
verified Β·
1 Parent(s): 23c508f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -164
app.py CHANGED
@@ -1,182 +1,74 @@
1
  import gradio as gr
2
- import pandas as pd
3
  from yt_dlp import YoutubeDL
4
- from yt_dlp.utils import DownloadError
5
- from datetime import datetime
6
- from pathlib import Path
7
- import plotly.express as px
8
- import logging
9
-
10
- logging.basicConfig(level=logging.INFO)
11
-
12
- def _extract_thumbnail_info(video, url, collection_date):
13
- """
14
- Extracts relevant thumbnail metadata from a yt-dlp entry.
15
- """
16
- upload_date_raw = video.get('upload_date') or video.get('release_date') or video.get('timestamp')
17
- upload_date = pd.to_datetime(upload_date_raw, unit='s', errors='coerce')
18
- if isinstance(upload_date_raw, str) and len(upload_date_raw) == 8 and upload_date_raw.isdigit():
19
- upload_date = pd.to_datetime(upload_date_raw, format='%Y%m%d', errors='coerce')
20
-
21
- return {
22
- 'collection_date': collection_date,
23
- 'profile_url': url,
24
- 'id': video.get('id'),
25
- 'title': video.get('title'),
26
- 'upload_date': upload_date,
27
- 'uploader': video.get('uploader'),
28
- 'thumbnail': video.get('thumbnail'),
29
- 'video_url': video.get('webpage_url') or video.get('url'),
30
- }
31
-
32
- def collect_thumbnails(profiles_text, profiles_file, cookies_file, max_videos=20, progress=gr.Progress()):
33
- """
34
- Collects thumbnail URLs and metadata from TikTok profiles using yt-dlp.
35
- """
36
- urls = []
37
- if profiles_text:
38
- urls.extend([line.strip() for line in profiles_text.splitlines() if line.strip()])
39
- if profiles_file is not None:
40
- try:
41
- file_content = Path(profiles_file.name).read_text(encoding='utf-8')
42
- urls.extend([line.strip() for line in file_content.splitlines() if line.strip()])
43
- except Exception as e:
44
- return None, None, None, f"❌ Failed to read file: {e}"
45
-
46
- if not urls:
47
- return None, None, None, "❌ Please provide at least one TikTok profile URL."
48
-
49
- collection_date = datetime.today().date()
50
- all_entries = []
51
-
52
  ydl_opts = {
53
- 'quiet': True,
54
- 'skip_download': True,
55
- 'extract_flat': False,
56
- 'http_headers': {
57
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
58
- 'AppleWebKit/537.36 (KHTML, like Gecko) '
59
- 'Chrome/114.0.0.0 Safari/537.36',
60
- },
61
  }
62
- if cookies_file is not None:
63
- ydl_opts['cookiefile'] = cookies_file.name
64
-
65
- with YoutubeDL(ydl_opts) as ydl:
66
- for i, url in enumerate(progress.tqdm(urls, desc="Processing Profiles")):
67
- try:
68
- info = ydl.extract_info(url, download=False)
69
- entries = info.get('entries', [])
70
 
71
- if entries:
72
- for video in entries[:max_videos]:
73
- if video:
74
- all_entries.append(_extract_thumbnail_info(video, url, collection_date))
75
- elif info.get('_type') != 'playlist':
76
- all_entries.append(_extract_thumbnail_info(info, url, collection_date))
77
- else:
78
- return None, None, None, f"⚠️ No videos found or profile is private for URL: {url}"
79
 
80
- except DownloadError as e:
81
- return None, None, None, f"❌ A network error occurred while processing {url}: {e}"
82
- except Exception as e:
83
- logging.error(f"Error processing {url}: {e}")
84
- return None, None, None, f"❌ An unexpected error occurred with {url}: {e}"
85
 
86
- if not all_entries:
87
- return None, None, None, "❌ No thumbnails found."
88
 
89
- df_thumbs = pd.DataFrame(all_entries)
90
- output_file = "tiktok_thumbnails.xlsx"
91
- df_thumbs.to_excel(output_file, index=False)
92
 
93
- # Collect thumbnail URLs for preview
94
- thumb_urls = df_thumbs['thumbnail'].dropna().tolist()
95
 
96
- return output_file, thumb_urls, len(df_thumbs), f"βœ… Collected {len(df_thumbs)} thumbnails."
 
 
 
97
 
98
- def generate_plot(output_file):
99
- """
100
- Reads the data from the output file and generates a Plotly graph.
101
- """
102
- if not output_file:
103
- return None, "⚠️ No data file to plot."
104
-
105
- try:
106
- df = pd.read_excel(output_file)
107
- df['upload_date'] = pd.to_datetime(df['upload_date'], errors='coerce')
108
- df = df.dropna(subset=['upload_date', 'uploader'])
109
- df['upload_month'] = df['upload_date'].dt.to_period('M').dt.to_timestamp()
110
 
111
- grouped = df.groupby(['upload_month', 'uploader']).size().reset_index(name='thumbnail_count')
112
-
113
- fig = px.line(
114
- grouped,
115
- x="upload_month",
116
- y="thumbnail_count",
117
- color="uploader",
118
- markers=True,
119
- title="Number of Thumbnails Over Time by Uploader"
120
- )
121
- fig.update_layout(
122
- xaxis_title="Upload Month",
123
- yaxis_title="Number of Thumbnails",
124
- legend_title="Uploader",
125
- hovermode="x unified"
126
- )
127
- return fig, "βœ… Plot generated successfully!"
128
- except Exception as e:
129
- return None, f"⚠️ Failed to generate plot. Ensure the data file is valid: {e}"
130
-
131
- def main_handler(profiles_text, profiles_file, cookies_file, max_videos):
132
- """
133
- Main function to handle the entire workflow and return Gradio outputs.
134
- """
135
- # Step 1: Collect thumbnails
136
- output_file, thumb_urls, count, collect_msg = collect_thumbnails(
137
- profiles_text, profiles_file, cookies_file, max_videos=max_videos
138
  )
139
- yield output_file, None, thumb_urls, collect_msg
140
 
141
- # Step 2: Generate plot if collection was successful
142
- if output_file:
143
- plot, plot_msg = generate_plot(output_file)
144
- yield output_file, plot, thumb_urls, plot_msg
145
- else:
146
- yield None, None, None, collect_msg
147
 
148
- # Gradio UI
149
- with gr.Blocks() as demo:
150
- gr.Markdown("""
151
- # πŸ–ΌοΈ TikTok Profile Thumbnail Collector
152
- **Instructions:**
153
- - Paste TikTok profile URLs or upload a `.txt` file with one URL per line.
154
- - You can upload a `cookies.txt` file to access private profiles.
155
- - Adjust the slider to limit the number of thumbnails per profile.
156
- - After collection, download the Excel file, preview thumbnails, and view activity over time.
157
- ---
158
- """)
159
 
160
- with gr.Row():
161
- profiles_text = gr.Textbox(label="TikTok profile URLs (one per line)", lines=6, placeholder="https://www.tiktok.com/@username")
162
- profiles_file = gr.File(label="Or upload a .txt file", file_types=['.txt'])
163
 
164
- cookies_file = gr.File(label="Upload TikTok cookies.txt (optional)", file_types=['.txt'])
165
- max_videos_slider = gr.Slider(minimum=1, maximum=50, step=1, value=20, label="Max Videos per Profile")
166
-
167
- btn = gr.Button("Collect & Analyze Thumbnails")
168
-
169
- with gr.Row():
170
- output_file_display = gr.File(label="Download Excel File")
171
- plot_output = gr.Plot(label="Thumbnail Activity Plot")
172
-
173
- thumbs_gallery = gr.Gallery(label="Thumbnails Preview", columns=5, height="auto")
174
- msg = gr.Textbox(label="Status", interactive=False, show_copy_button=True)
175
 
176
- btn.click(
177
- fn=main_handler,
178
- inputs=[profiles_text, profiles_file, cookies_file, max_videos_slider],
179
- outputs=[output_file_display, plot_output, thumbs_gallery, msg]
180
- )
181
-
182
- demo.launch()
 
1
  import gradio as gr
 
2
  from yt_dlp import YoutubeDL
3
+ import os
4
+ import tempfile
5
+ import shutil
6
+
7
+ # -----------------------
8
+ # Helper Functions
9
+ # -----------------------
10
+ def download_tiktok_thumbnails(username_url, cookies_file=None, max_videos=20):
11
+ tmp_dir = tempfile.mkdtemp()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ydl_opts = {
13
+ "skip_download": True,
14
+ "write_thumbnail": True,
15
+ "outtmpl": os.path.join(tmp_dir, "%(id)s.%(ext)s"),
16
+ "quiet": True,
 
 
 
 
17
  }
18
+ if cookies_file:
19
+ ydl_opts["cookiefile"] = cookies_file.name
 
 
 
 
 
 
20
 
21
+ try:
22
+ with YoutubeDL(ydl_opts) as ydl:
23
+ ydl.download([username_url])
24
+ except Exception as e:
25
+ return f"❌ Error: {e}", None, None
 
 
 
26
 
27
+ # Collect all thumbnails
28
+ thumbs_all = [os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir)
29
+ if f.lower().endswith((".jpg", ".webp", ".png"))]
 
 
30
 
31
+ # Apply limiter
32
+ thumbs = thumbs_all[:max_videos]
33
 
34
+ if not thumbs:
35
+ return "❌ No thumbnails found", None, None
 
36
 
37
+ return f"βœ… Fetched {len(thumbs)} thumbnails", thumbs, tmp_dir
 
38
 
39
+ def prepare_zip(tmp_dir):
40
+ zip_path = os.path.join(tempfile.gettempdir(), "tiktok_thumbnails.zip")
41
+ shutil.make_archive(zip_path.replace(".zip",""), 'zip', tmp_dir)
42
+ return zip_path
43
 
44
+ def fetch_and_zip_tiktok(username_url, cookies_file, max_videos):
45
+ status, thumbs, tmp_dir = download_tiktok_thumbnails(username_url, cookies_file, max_videos)
46
+ zip_file = prepare_zip(tmp_dir) if thumbs else None
47
+ return status, thumbs, zip_file
 
 
 
 
 
 
 
 
48
 
49
+ # -----------------------
50
+ # Gradio Interface
51
+ # -----------------------
52
+ with gr.Blocks() as demo:
53
+ gr.Markdown("## 🎡 TikTok Account Thumbnails Downloader with Cookies")
54
+ gr.Markdown(
55
+ "Upload your `cookies.txt` (exported from your browser) first, then enter a TikTok account URL.\n"
56
+ "Thumbnails from private or age-restricted videos may require cookies."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  )
 
58
 
59
+ cookies_upload = gr.File(label="Upload cookies.txt (required for private content)", file_types=[".txt"])
60
+ url_input = gr.Textbox(label="TikTok Account URL", placeholder="https://www.tiktok.com/@username")
61
+ max_videos_slider = gr.Slider(minimum=1, maximum=50, step=1, value=20, label="Max Videos to Fetch")
62
+ fetch_btn = gr.Button("πŸ“₯ Fetch Thumbnails")
 
 
63
 
64
+ status_output = gr.Textbox(label="Status")
65
+ thumbs_gallery = gr.Gallery(label="Thumbnails Preview", elem_id="thumbs_gallery", columns=5, height="auto")
66
+ download_btn = gr.File(label="Download All Thumbnails (ZIP)")
 
 
 
 
 
 
 
 
67
 
68
+ inputs_list = [url_input, cookies_upload, max_videos_slider]
69
+ outputs_list = [status_output, thumbs_gallery, download_btn]
 
70
 
71
+ fetch_btn.click(fetch_and_zip_tiktok, inputs=inputs_list, outputs=outputs_list)
 
 
 
 
 
 
 
 
 
 
72
 
73
+ if __name__ == "__main__":
74
+ demo.launch()