Recording-QC-Bot / src /preprocessing /download_manager.py
varund2003's picture
added dspy, to allow .mkv files, upload multiple slides and notebooks, remove base name matching in mentor materials
a4af32a
# src/preprocessing/download_manager.py
import os
import logging
from .gdrive_manager import GoogleDriveManager
import json
from google.oauth2 import service_account
logger = logging.getLogger(__name__)
class GoogleDriveDownloader:
SCOPES = ['https://www.googleapis.com/auth/drive'] # <-- Add this line
def __init__(self, download_path: str, drive_folders: dict):
self.download_path = download_path
os.makedirs(download_path, exist_ok=True)
self.gdrive = GoogleDriveManager()
self.drive_folders = drive_folders # Dict with keys: VIDEOS, AUDIOS, TRANSCRIPTS, REPORTS, MENTOR_MATERIALS
gcp_credentials = os.getenv("GCP_CREDENTIALS")
if gcp_credentials:
cred_data = json.loads(gcp_credentials)
self.creds = service_account.Credentials.from_service_account_info(
cred_data, scopes=self.SCOPES
)
def process_one_video(self, videos_folder_url: str):
videos_folder_id = self.gdrive.get_folder_id(videos_folder_url)
video_files = self.gdrive.list_files(videos_folder_id, ['video/mp4', 'video/x-matroska'])
if not video_files:
logger.info("No videos found in Drive folder.")
return None
# Process only the first video
video = video_files[0]
local_video_path = os.path.join(self.download_path, video['name'])
self.gdrive.download_file(video['id'], local_video_path)
logger.info(f"Downloaded: {video['name']}")
return {
'id': video['id'],
'name': video['name'],
'path': local_video_path
}
def delete_drive_file(self, file_id):
self.gdrive.delete_file(file_id)
def upload_to_drive(self, local_path, folder_key, mime_type):
folder_id = self.drive_folders[folder_key]
return self.gdrive.upload_file(local_path, folder_id, mime_type)
def list_all_videos(self, videos_folder_url: str):
videos_folder_id = self.gdrive.get_folder_id(videos_folder_url)
# Accept both mp4 and mkv
return self.gdrive.list_files(videos_folder_id, ['video/mp4', 'video/x-matroska'])