jebin2 commited on
Commit
d7f11fd
·
1 Parent(s): 4a8a9de

setup 2n for infloxa

Browse files
src/asset_selector.py CHANGED
@@ -31,7 +31,7 @@ class AssetSelector:
31
  """Load video library from specific CSV file"""
32
  try:
33
  if os.getenv("INFLOXA", "false").lower() == "true":
34
- csv_filename = "video_analyser/infloxa_video_analysis.csv"
35
  elif os.getenv("USE_VEO", "false").lower() == "true":
36
  csv_filename = "data/somira_video_library_veo.csv"
37
  else:
@@ -95,9 +95,23 @@ class AssetSelector:
95
  audios.remove("https://storage.googleapis.com/somira/ssstik.io_1762269951926.mp3")
96
 
97
  if os.getenv("INFLOXA", "false").lower() == "true":
98
- audios = ["testData/infloxa/screenrecording.mp3"]
99
  return audios
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  async def select_videos(self, tts_script, timed_transcript, max_duration: int = 12) -> Tuple[List[Dict], str]:
102
  """Select videos using AI analysis of TTS script"""
103
  try:
 
31
  """Load video library from specific CSV file"""
32
  try:
33
  if os.getenv("INFLOXA", "false").lower() == "true":
34
+ csv_filename = "data/infloxa_video_library100.csv"
35
  elif os.getenv("USE_VEO", "false").lower() == "true":
36
  csv_filename = "data/somira_video_library_veo.csv"
37
  else:
 
95
  audios.remove("https://storage.googleapis.com/somira/ssstik.io_1762269951926.mp3")
96
 
97
  if os.getenv("INFLOXA", "false").lower() == "true":
98
+ audios = ["testData/infloxa/audiopulse.mp3"]
99
  return audios
100
 
101
+ def audio_beats_map(self, audio_path: str) -> Optional[List[float]]:
102
+ """Load or compute audio beats map from local file"""
103
+ try:
104
+ audio_map = {
105
+ "testData/infloxa/audiopulse.mp3": [1.01, 1.17, 2.24, 4.06, 5.14, 6.21, 8.03, 9.11],
106
+ }
107
+ if audio_path in audio_map:
108
+ return audio_map[audio_path]
109
+
110
+ except Exception as e:
111
+ logger.error(f"Failed to compute audio beats map for {audio_path}: {e}")
112
+
113
+ return None
114
+
115
  async def select_videos(self, tts_script, timed_transcript, max_duration: int = 12) -> Tuple[List[Dict], str]:
116
  """Select videos using AI analysis of TTS script"""
117
  try:
src/automation.py CHANGED
@@ -199,13 +199,23 @@ class ContentAutomation:
199
 
200
  music_duration = 10
201
  if os.getenv("INFLOXA", "false").lower() == "true":
202
- music_duration = 23
203
 
204
  beat_times = None
205
  try_next = False
206
 
207
  while beat_times is None:
208
  await self._download_bg_music(try_next)
 
 
 
 
 
 
 
 
 
 
209
  # Get ALL beats (no filtering yet - we'll handle min duration in render_random_video)
210
  beat_times, method_used = utils.get_best_beat_method(
211
  self.data_holder.visual_assets["background_music_local"],
@@ -238,7 +248,7 @@ class ContentAutomation:
238
  if os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "true":
239
  # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
240
  video_no_audio_path = await self.video_renderer.render_interval_video(
241
- 0.3,
242
  music_duration
243
  )
244
  else:
@@ -316,10 +326,10 @@ class ContentAutomation:
316
  await self._download_to_local(
317
  self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
318
  )
319
- if os.getenv("INFLOXA", "false").lower() == "true":
320
- output_path = "/tmp/repeated_bg_music.mp3"
321
- output_path = utils.repeat_audio_ffmpeg(self.data_holder.visual_assets["background_music_local"], output_path, 5)
322
- self.data_holder.visual_assets["background_music_local"] = output_path
323
 
324
  async def create_audio(self):
325
  try_again = False
@@ -372,6 +382,8 @@ class ContentAutomation:
372
  from video_downloader import VideoDownloader
373
  download_path="testData/infloxa"
374
 
 
 
375
  self.data_holder.visual_assets["all_videos"] = [
376
  {
377
  "url": row.get("video_filename", "").strip(),
@@ -381,7 +393,7 @@ class ContentAutomation:
381
  )
382
  }
383
  for _, row in self.asset_selector.video_library.iterrows()
384
- if row.get("video_filename", "").strip() and row.get("final_selection_score", 0) > 80
385
  ]
386
  else:
387
  self.data_holder.visual_assets["all_videos"] = [
 
199
 
200
  music_duration = 10
201
  if os.getenv("INFLOXA", "false").lower() == "true":
202
+ music_duration = 15
203
 
204
  beat_times = None
205
  try_next = False
206
 
207
  while beat_times is None:
208
  await self._download_bg_music(try_next)
209
+ with AudioFileClip(self.data_holder.visual_assets["background_music_local"]) as audio_clip:
210
+ music_duration = audio_clip.duration - 0.5
211
+
212
+
213
+ if self.asset_selector.audio_beats_map().get(self.data_holder.visual_assets.get("background_music_url", ""), None):
214
+ beat_times = self.asset_selector.audio_beats_map()[self.data_holder.visual_assets.get("background_music_url", "")]
215
+ method_used = "cached"
216
+ logger.info("Using cached beat times.")
217
+ break
218
+
219
  # Get ALL beats (no filtering yet - we'll handle min duration in render_random_video)
220
  beat_times, method_used = utils.get_best_beat_method(
221
  self.data_holder.visual_assets["background_music_local"],
 
248
  if os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "true":
249
  # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
250
  video_no_audio_path = await self.video_renderer.render_interval_video(
251
+ 0.5,
252
  music_duration
253
  )
254
  else:
 
326
  await self._download_to_local(
327
  self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
328
  )
329
+ # if os.getenv("INFLOXA", "false").lower() == "true":
330
+ # output_path = "/tmp/repeated_bg_music.mp3"
331
+ # output_path = utils.repeat_audio_ffmpeg(self.data_holder.visual_assets["background_music_local"], output_path, 5)
332
+ # self.data_holder.visual_assets["background_music_local"] = output_path
333
 
334
  async def create_audio(self):
335
  try_again = False
 
382
  from video_downloader import VideoDownloader
383
  download_path="testData/infloxa"
384
 
385
+ allowed_videos = []
386
+
387
  self.data_holder.visual_assets["all_videos"] = [
388
  {
389
  "url": row.get("video_filename", "").strip(),
 
393
  )
394
  }
395
  for _, row in self.asset_selector.video_library.iterrows()
396
+ if row.get("video_filename", "").strip() in allowed_videos
397
  ]
398
  else:
399
  self.data_holder.visual_assets["all_videos"] = [
src/video_downloader.py CHANGED
@@ -5,9 +5,13 @@ Download videos by filename from Google Drive
5
 
6
  import os
7
  import csv
 
8
  import sys
 
9
  from typing import Optional, List, Dict
10
  import shutil
 
 
11
 
12
  # Try to import logger from utils, fallback to print
13
  try:
@@ -44,6 +48,7 @@ class VideoDownloader:
44
  self.config_path = config_path
45
  self.video_library = self._load_video_library()
46
  self.drive_downloader = None
 
47
 
48
  def _load_video_library(self) -> List[Dict]:
49
  """Load video library from CSV file"""
@@ -84,20 +89,338 @@ class VideoDownloader:
84
  raise FileNotFoundError(f"Config file not found: {self.config_path}")
85
 
86
  with open(self.config_path, 'r') as f:
87
- config = yaml.safe_load(f)
88
 
89
  # Override the local directory with our download path
90
- config['output']['local_video_dir'] = download_path
91
 
92
  # Initialize and authenticate
93
  logger.info("Initializing Google Drive connection...")
94
- self.drive_downloader = DriveDownloader(config)
95
  self.drive_downloader.authenticate()
96
  logger.info("✓ Google Drive authenticated")
97
 
98
  except Exception as e:
99
  logger.error(f"Failed to initialize DriveDownloader: {e}")
100
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  def get_folder_name(
103
  self,
@@ -124,6 +447,22 @@ class VideoDownloader:
124
  return matching_video["path"].split("/")[0]
125
  except: return None
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  def download_video(
128
  self,
129
  video_filename: str,
@@ -237,6 +576,7 @@ class VideoDownloader:
237
 
238
  return results
239
 
 
240
  def add_folder_name_column(input_csv, output_csv, downloader):
241
  processed = set()
242
 
@@ -273,18 +613,112 @@ def add_folder_name_column(input_csv, output_csv, downloader):
273
  writer.writerow([folder_name] + list(row.values()))
274
  outfile.flush()
275
 
276
- try:
277
- from dotenv import load_dotenv
278
- load_dotenv()
279
-
280
- csv_path = "video_analyser/infloxa_video_analysis.csv"
281
- video_filename = "Copy of Sunset View Balcony.MOV"
282
- downloader = VideoDownloader(csv_path)
283
- add_folder_name_column(
284
- input_csv=csv_path,
285
- output_csv="video_analyser/infloxa_video_analysis_with_folders.csv",
286
- downloader=downloader
287
- )
288
-
289
- except KeyboardInterrupt:
290
- print("\nStopped by Ctrl+C")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  import os
7
  import csv
8
+ from pathlib import Path
9
  import sys
10
+ import re
11
  from typing import Optional, List, Dict
12
  import shutil
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ import threading
15
 
16
  # Try to import logger from utils, fallback to print
17
  try:
 
48
  self.config_path = config_path
49
  self.video_library = self._load_video_library()
50
  self.drive_downloader = None
51
+ self.lock = threading.Lock() # Thread-safe lock for drive operations
52
 
53
  def _load_video_library(self) -> List[Dict]:
54
  """Load video library from CSV file"""
 
89
  raise FileNotFoundError(f"Config file not found: {self.config_path}")
90
 
91
  with open(self.config_path, 'r') as f:
92
+ self.config = yaml.safe_load(f)
93
 
94
  # Override the local directory with our download path
95
+ self.config['output']['local_video_dir'] = download_path
96
 
97
  # Initialize and authenticate
98
  logger.info("Initializing Google Drive connection...")
99
+ self.drive_downloader = DriveDownloader(self.config)
100
  self.drive_downloader.authenticate()
101
  logger.info("✓ Google Drive authenticated")
102
 
103
  except Exception as e:
104
  logger.error(f"Failed to initialize DriveDownloader: {e}")
105
  raise
106
+
107
+ def _get_thread_service(self):
108
+ """Get a thread-local Google Drive service instance"""
109
+ import threading
110
+ thread_id = threading.get_ident()
111
+
112
+ # Check if this thread already has a service
113
+ if not hasattr(self, '_thread_services'):
114
+ self._thread_services = {}
115
+
116
+ if thread_id not in self._thread_services:
117
+ # Create a new service for this thread
118
+ from modules import DriveDownloader
119
+ import yaml
120
+
121
+ with open(self.config_path, 'r') as f:
122
+ config = yaml.safe_load(f)
123
+
124
+ downloader = DriveDownloader(config)
125
+ downloader.authenticate()
126
+ self._thread_services[thread_id] = downloader.service
127
+ logger.info(f"Created new Drive service for thread {thread_id}")
128
+
129
+ return self._thread_services[thread_id]
130
+
131
+ def _extract_folder_id_from_link(self, drive_link: str) -> Optional[str]:
132
+ """
133
+ Extract folder ID from Google Drive link
134
+
135
+ Args:
136
+ drive_link: Google Drive folder URL
137
+
138
+ Returns:
139
+ Folder ID or None if not found
140
+ """
141
+ patterns = [
142
+ r'folders/([a-zA-Z0-9_-]+)',
143
+ r'id=([a-zA-Z0-9_-]+)',
144
+ ]
145
+
146
+ for pattern in patterns:
147
+ match = re.search(pattern, drive_link)
148
+ if match:
149
+ return match.group(1)
150
+
151
+ logger.error(f"Could not extract folder ID from link: {drive_link}")
152
+ return None
153
+
154
+ def _list_folder_contents_recursive(self, folder_id: str, parent_path: str = "") -> List[Dict]:
155
+ """
156
+ Recursively list all files in a folder and its subfolders
157
+
158
+ Args:
159
+ folder_id: Google Drive folder ID
160
+ parent_path: Path of parent folder for tracking structure
161
+
162
+ Returns:
163
+ List of dictionaries with file info including relative path
164
+ """
165
+ try:
166
+ files_and_folders = []
167
+
168
+ # Query for all items in this folder
169
+ query = f"'{folder_id}' in parents and trashed=false"
170
+ results = self.drive_downloader.service.files().list(
171
+ q=query,
172
+ fields="files(id, name, mimeType, webViewLink)",
173
+ pageSize=1000
174
+ ).execute()
175
+
176
+ items = results.get('files', [])
177
+
178
+ for item in items:
179
+ item_name = item['name']
180
+ item_id = item['id']
181
+ mime_type = item['mimeType']
182
+
183
+ # Build current path
184
+ current_path = os.path.join(parent_path, item_name) if parent_path else item_name
185
+
186
+ if mime_type == 'application/vnd.google-apps.folder':
187
+ # It's a folder - recurse into it
188
+ logger.info(f"Scanning folder: {current_path}")
189
+ subfolder_contents = self._list_folder_contents_recursive(item_id, current_path)
190
+ files_and_folders.extend(subfolder_contents)
191
+ else:
192
+ # It's a file
193
+ files_and_folders.append({
194
+ 'id': item_id,
195
+ 'name': item_name,
196
+ 'path': current_path,
197
+ 'mimeType': mime_type,
198
+ 'webViewLink': item.get('webViewLink', '')
199
+ })
200
+
201
+ return files_and_folders
202
+
203
+ except Exception as e:
204
+ logger.error(f"Error listing folder contents: {e}")
205
+ return []
206
+
207
+ def _download_single_file(
208
+ self,
209
+ file_info: Dict,
210
+ download_root: str,
211
+ idx: int,
212
+ total: int
213
+ ) -> Dict[str, any]:
214
+ """
215
+ Download a single file from Google Drive (for parallel execution)
216
+
217
+ Args:
218
+ file_info: Dictionary with file information
219
+ download_root: Root directory for downloads
220
+ idx: Current file index
221
+ total: Total number of files
222
+
223
+ Returns:
224
+ Dictionary with download result
225
+ """
226
+ result = {
227
+ 'file': file_info['name'],
228
+ 'status': 'unknown',
229
+ 'path': None,
230
+ 'error': None
231
+ }
232
+
233
+ try:
234
+ # Build local path preserving folder structure
235
+ relative_path = file_info['path']
236
+ local_path = os.path.join(download_root, relative_path)
237
+ local_dir = os.path.dirname(local_path)
238
+
239
+ # Check if file already exists
240
+ if os.path.exists(local_path):
241
+ logger.info(f"[{idx}/{total}] Skipped (exists): {relative_path}")
242
+ result['status'] = 'skipped'
243
+ result['path'] = local_path
244
+ return result
245
+
246
+ # Create directory structure BEFORE downloading
247
+ os.makedirs(local_dir, exist_ok=True)
248
+ logger.info(f"[{idx}/{total}] Downloading: {relative_path}")
249
+
250
+ # Get thread-local service instance
251
+ service = self._get_thread_service()
252
+
253
+ # Download file DIRECTLY to the final destination
254
+ request = service.files().get_media(fileId=file_info['id'])
255
+
256
+ with open(local_path, 'wb') as f:
257
+ from googleapiclient.http import MediaIoBaseDownload
258
+ downloader = MediaIoBaseDownload(f, request)
259
+ done = False
260
+ last_progress = 0
261
+ while not done:
262
+ status, done = downloader.next_chunk()
263
+ if status:
264
+ progress = int(status.progress() * 100)
265
+ # Log every 25% to avoid spam
266
+ if progress >= last_progress + 25:
267
+ logger.info(f" [{file_info['name']}] Progress: {progress}%")
268
+ last_progress = progress
269
+
270
+ logger.info(f"✓ Successfully downloaded: {local_path}")
271
+ result['status'] = 'downloaded'
272
+ result['path'] = local_path
273
+
274
+ except Exception as e:
275
+ logger.error(f"Failed to download {file_info['name']}: {e}")
276
+ result['status'] = 'failed'
277
+ result['error'] = str(e)
278
+
279
+ # Clean up partial download if it exists
280
+ if 'local_path' in locals() and os.path.exists(local_path):
281
+ try:
282
+ os.remove(local_path)
283
+ except:
284
+ pass
285
+
286
+ return result
287
+
288
+ def download_from_drive_link(
289
+ self,
290
+ drive_link: str,
291
+ download_root: str,
292
+ file_extensions: Optional[List[str]] = None,
293
+ max_workers: int = 10 # Number of parallel downloads
294
+ ) -> Dict[str, any]:
295
+ """
296
+ Download all files from a Google Drive folder link, preserving folder structure
297
+ (with parallel downloads)
298
+
299
+ Args:
300
+ drive_link: Google Drive folder URL
301
+ e.g., https://drive.google.com/drive/folders/1WSrVAyqvPJzpRnoUxkNx0LqK9VlDs432
302
+ download_root: Root directory where files should be downloaded
303
+ file_extensions: Optional list of file extensions to filter (e.g., ['.mp4', '.avi'])
304
+ If None, downloads all files
305
+ max_workers: Number of parallel downloads (default: 10)
306
+
307
+ Returns:
308
+ Dictionary with download statistics:
309
+ {
310
+ 'total_files': int,
311
+ 'downloaded': int,
312
+ 'skipped': int,
313
+ 'failed': int,
314
+ 'files': List[str] # paths of downloaded files
315
+ }
316
+
317
+ Example:
318
+ >>> downloader = VideoDownloader()
319
+ >>> result = downloader.download_from_drive_link(
320
+ ... drive_link="https://drive.google.com/drive/folders/1WSrVAyqvPJzpRnoUxkNx0LqK9VlDs432",
321
+ ... download_root="downloads/my_videos",
322
+ ... file_extensions=['.mp4', '.mov', '.avi'],
323
+ ... max_workers=10
324
+ ... )
325
+ >>> print(f"Downloaded {result['downloaded']} files")
326
+ """
327
+ try:
328
+ # Initialize Drive downloader (pass None to avoid auto path setup)
329
+ self._init_drive_downloader(None)
330
+
331
+ # Extract folder ID from link
332
+ folder_id = self._extract_folder_id_from_link(drive_link)
333
+ if not folder_id:
334
+ return {
335
+ 'total_files': 0,
336
+ 'downloaded': 0,
337
+ 'skipped': 0,
338
+ 'failed': 0,
339
+ 'files': []
340
+ }
341
+
342
+ logger.info(f"Scanning Google Drive folder: {folder_id}")
343
+
344
+ # Get all files recursively
345
+ all_files = self._list_folder_contents_recursive(folder_id)
346
+
347
+ # Filter by file extensions if provided
348
+ if file_extensions:
349
+ file_extensions = [ext.lower() if ext.startswith('.') else f'.{ext.lower()}'
350
+ for ext in file_extensions]
351
+ all_files = [f for f in all_files
352
+ if any(f['name'].lower().endswith(ext) for ext in file_extensions)]
353
+
354
+ logger.info(f"Found {len(all_files)} files to download")
355
+
356
+ # Statistics
357
+ stats = {
358
+ 'total_files': len(all_files),
359
+ 'downloaded': 0,
360
+ 'skipped': 0,
361
+ 'failed': 0,
362
+ 'files': []
363
+ }
364
+
365
+ # Download files in parallel using ThreadPoolExecutor
366
+ logger.info(f"Starting parallel downloads with {max_workers} workers...")
367
+
368
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
369
+ # Submit all download tasks
370
+ future_to_file = {
371
+ executor.submit(
372
+ self._download_single_file,
373
+ file_info,
374
+ download_root,
375
+ idx,
376
+ len(all_files)
377
+ ): file_info
378
+ for idx, file_info in enumerate(all_files, 1)
379
+ }
380
+
381
+ # Collect results as they complete
382
+ for future in as_completed(future_to_file):
383
+ file_info = future_to_file[future]
384
+ try:
385
+ result = future.result()
386
+
387
+ if result['status'] == 'downloaded':
388
+ stats['downloaded'] += 1
389
+ if result['path']:
390
+ stats['files'].append(result['path'])
391
+ elif result['status'] == 'skipped':
392
+ stats['skipped'] += 1
393
+ if result['path']:
394
+ stats['files'].append(result['path'])
395
+ elif result['status'] == 'failed':
396
+ stats['failed'] += 1
397
+
398
+ except Exception as e:
399
+ logger.error(f"Error processing {file_info['name']}: {e}")
400
+ stats['failed'] += 1
401
+
402
+ # Summary
403
+ logger.info("=" * 60)
404
+ logger.info("Download Summary:")
405
+ logger.info(f" Total files: {stats['total_files']}")
406
+ logger.info(f" Downloaded: {stats['downloaded']}")
407
+ logger.info(f" Skipped (already exist): {stats['skipped']}")
408
+ logger.info(f" Failed: {stats['failed']}")
409
+ logger.info("=" * 60)
410
+
411
+ return stats
412
+
413
+ except Exception as e:
414
+ logger.error(f"Error downloading from drive link: {e}")
415
+ import traceback
416
+ traceback.print_exc()
417
+ return {
418
+ 'total_files': 0,
419
+ 'downloaded': 0,
420
+ 'skipped': 0,
421
+ 'failed': 0,
422
+ 'files': []
423
+ }
424
 
425
  def get_folder_name(
426
  self,
 
447
  return matching_video["path"].split("/")[0]
448
  except: return None
449
 
450
+
451
+
452
+ def get_video_link(self, video_filename: str) -> Optional[str]:
453
+ """Fetches the Google Drive webViewLink for the file"""
454
+ try:
455
+ self._init_drive_downloader(None)
456
+ all_videos = self.drive_downloader.list_all_videos()
457
+ for video_item in all_videos:
458
+ if video_item['name'] == video_filename:
459
+ # Return the webLink if available, otherwise construct one from ID
460
+ return video_item.get('webViewLink') or f"https://drive.google.com/file/d/{video_item.get('id')}/view"
461
+ return None
462
+ except Exception as e:
463
+ logger.error(f"Error fetching link for {video_filename}: {e}")
464
+ return None
465
+
466
  def download_video(
467
  self,
468
  video_filename: str,
 
576
 
577
  return results
578
 
579
+
580
  def add_folder_name_column(input_csv, output_csv, downloader):
581
  processed = set()
582
 
 
613
  writer.writerow([folder_name] + list(row.values()))
614
  outfile.flush()
615
 
616
+
617
+ def add_link_column(input_csv, output_csv, downloader):
618
+ """Reads input_csv and writes to output_csv with an added 'video_link' column"""
619
+ processed = set()
620
+
621
+ # 1. Load already processed filenames to allow resuming
622
+ if os.path.exists(output_csv):
623
+ with open(output_csv, newline="", encoding="utf-8") as f:
624
+ reader = csv.DictReader(f)
625
+ if reader.fieldnames and "video_filename" in reader.fieldnames:
626
+ for row in reader:
627
+ processed.add(row["video_filename"].split("/")[-1])
628
+
629
+ # 2. Process the files
630
+ with open(input_csv, newline="", encoding="utf-8") as infile:
631
+ reader = csv.DictReader(infile)
632
+ fieldnames = reader.fieldnames
633
+
634
+ # Determine if we need to write the header
635
+ file_exists = os.path.exists(output_csv) and os.path.getsize(output_csv) > 0
636
+
637
+ with open(output_csv, "a", newline="", encoding="utf-8") as outfile:
638
+ # We want 'video_link' to be the first column
639
+ writer = csv.DictWriter(outfile, fieldnames=["video_link"] + fieldnames)
640
+
641
+ if not file_exists:
642
+ writer.writeheader()
643
+
644
+ for row in reader:
645
+ video_filename = row["video_filename"].split("/")[-1]
646
+
647
+ if video_filename in processed:
648
+ continue
649
+
650
+ logger.info(f"Fetching link for: {video_filename}")
651
+ video_link = downloader.get_video_link(video_filename)
652
+
653
+ if not video_link:
654
+ logger.warning(f"Could not find link for {video_filename}")
655
+ continue
656
+
657
+ # Prepare new row
658
+ new_row = {"video_link": video_link}
659
+ new_row.update(row)
660
+
661
+ writer.writerow(new_row)
662
+ outfile.flush() # Ensure it saves frequently
663
+ processed.add(video_filename)
664
+
665
+
666
+ # Example usage
667
+ if __name__ == "__main__":
668
+ try:
669
+ from dotenv import load_dotenv
670
+ load_dotenv()
671
+
672
+ downloader = VideoDownloader()
673
+ downloader._init_drive_downloader(download_path="testData/infloxa")
674
+ add_link_column("testData/infloxa_copy/videos.csv", "testData/infloxa_copy/videos_with_links.csv", downloader)
675
+
676
+ # Download from Drive folder link
677
+ # result = downloader.download_from_drive_link(
678
+ # drive_link="https://drive.google.com/drive/folders/1WSrVAyqvPJzpRnoUxkNx0LqK9VlDs432",
679
+ # download_root="testData/infloxa",
680
+ # file_extensions=['.mp4', '.mov', '.avi', '.mkv'] # Only video files
681
+ # )
682
+
683
+ # print(f"\nDownload completed!")
684
+ # print(f"Total: {result['total_files']}, Downloaded: {result['downloaded']}, "
685
+ # f"Skipped: {result['skipped']}, Failed: {result['failed']}")
686
+
687
+ # paths = [
688
+ # "testData/infloxa_copy/Infloxa_ Lifestyle_125videos",
689
+ # "testData/infloxa_copy/Infloxa_LuxuryCars_125videos",
690
+ # "testData/infloxa_copy/Infloxa_LuxuryItems_125videos",
691
+ # "testData/infloxa_copy/Infloxa_LuxuryRealEstate_125videos",
692
+ # "testData/infloxa_copy/Infloxa_Models_125videos",
693
+ # "testData/infloxa_copy/Infloxa_PrivateJets_125videos",
694
+ # "testData/infloxa_copy/Infloxa_Wealth&Exclusivity_125videos",
695
+ # "testData/infloxa_copy/Infloxa_Yachts_125videos",
696
+ # ]
697
+
698
+ # output_csv = "testData/infloxa_copy/videos_with_links.csv"
699
+
700
+ # VIDEO_EXTENSIONS = {".mp4", ".mov", ".mkv", ".avi", ".webm"}
701
+
702
+ # rows = []
703
+
704
+ # for base_path in paths:
705
+ # base_path = Path(base_path)
706
+
707
+ # if not base_path.exists():
708
+ # print(f"Skipping missing folder: {base_path}")
709
+ # continue
710
+
711
+ # for file in base_path.iterdir():
712
+ # if file.is_file() and file.suffix.lower() in VIDEO_EXTENSIONS:
713
+ # rows.append([file.name])
714
+
715
+ # # Write CSV
716
+ # with open(output_csv, "w", newline="", encoding="utf-8") as f:
717
+ # writer = csv.writer(f)
718
+ # writer.writerow(["video_filename"])
719
+ # writer.writerows(rows)
720
+
721
+ # print(f"CSV created with {len(rows)} entries → {output_csv}")
722
+
723
+ except KeyboardInterrupt:
724
+ print("\nStopped by Ctrl+C")
video_analyser/get_refresh_token.py CHANGED
@@ -1,62 +1,21 @@
1
- """
2
- Helper script to get a Google Drive refresh token with the correct scopes.
3
- Run this once to generate a new refresh token for video_analyser.
4
- """
5
-
6
- import os
7
  from google_auth_oauthlib.flow import InstalledAppFlow
8
- from dotenv import load_dotenv
9
-
10
- # Load existing credentials
11
- load_dotenv()
12
 
13
- # Scopes needed for video analyser
14
  SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
15
 
16
- def get_refresh_token():
17
- """Get a new refresh token with Drive readonly scope"""
18
-
19
- client_id = os.getenv('SERVER_GOOGLE_CLIENT_ID')
20
- client_secret = os.getenv('SERVER_GOOGLE_CLIENT_SECRET')
21
-
22
- if not client_id or not client_secret:
23
- print("ERROR: Missing SERVER_GOOGLE_CLIENT_ID or SERVER_GOOGLE_CLIENT_SECRET in .env")
24
- return
25
-
26
- # Create OAuth flow with manual client config
27
- client_config = {
28
- "installed": {
29
- "client_id": client_id,
30
- "client_secret": client_secret,
31
- "auth_uri": "https://accounts.google.com/o/oauth2/auth",
32
- "token_uri": "https://oauth2.googleapis.com/token",
33
- "redirect_uris": ["http://localhost"]
34
- }
35
- }
36
-
37
- flow = InstalledAppFlow.from_client_config(client_config, SCOPES)
38
-
39
- print("\n" + "="*60)
40
- print("Google Drive Refresh Token Generator")
41
- print("="*60)
42
- print(f"\nRequesting scopes: {', '.join(SCOPES)}")
43
- print("\nA browser window will open for authorization...")
44
- print("Sign in and grant access to Google Drive (read-only)")
45
- print("="*60 + "\n")
46
-
47
- # Run OAuth flow
48
- creds = flow.run_local_server(port=8080)
49
-
50
- print("\n" + "="*60)
51
- print("✅ Authorization successful!")
52
- print("="*60)
53
- print("\nAdd this to your .env file:")
54
- print("-"*60)
55
- print(f"DRIVE_GOOGLE_REFRESH_TOKEN={creds.refresh_token}")
56
- print("-"*60)
57
- print("\nThen update video_analyser to use DRIVE_GOOGLE_REFRESH_TOKEN")
58
- print("instead of SERVER_GOOGLE_REFRESH_TOKEN for Drive operations.")
59
- print("="*60 + "\n")
60
 
61
- if __name__ == '__main__':
62
- get_refresh_token()
 
 
 
 
 
 
 
1
  from google_auth_oauthlib.flow import InstalledAppFlow
 
 
 
 
2
 
3
+ # Use YouTube scope (not Drive)
4
  SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
5
 
6
+ def main():
7
+ print("🔑 Starting OAuth flow...")
8
+
9
+ flow = InstalledAppFlow.from_client_secrets_file(
10
+ "whoa/client_secret_688373610660-vtr5l8q7s4is9kkvd7hla1cqg273emfs.apps.googleusercontent.com.json",
11
+ SCOPES
12
+ )
13
+
14
+ creds = flow.run_local_server(port=0)
15
+
16
+ print("\n✅ AUTH SUCCESS")
17
+ print("REFRESH TOKEN:\n")
18
+ print(creds.refresh_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ if __name__ == "__main__":
21
+ main()
video_analyser/modules/drive_downloader.py CHANGED
@@ -12,7 +12,6 @@ from tqdm import tqdm
12
 
13
  from google.auth.transport.requests import Request
14
  from google.oauth2.credentials import Credentials
15
- from google_auth_oauthlib.flow import InstalledAppFlow
16
  from googleapiclient.discovery import build
17
  from googleapiclient.http import MediaIoBaseDownload
18
 
 
12
 
13
  from google.auth.transport.requests import Request
14
  from google.oauth2.credentials import Credentials
 
15
  from googleapiclient.discovery import build
16
  from googleapiclient.http import MediaIoBaseDownload
17