Spaces:
Sleeping
Sleeping
added dspy, to allow .mkv files, upload multiple slides and notebooks, remove base name matching in mentor materials
a4af32a
| # src/preprocessing/download_manager.py | |
| import os | |
| import logging | |
| from .gdrive_manager import GoogleDriveManager | |
| import json | |
| from google.oauth2 import service_account | |
| logger = logging.getLogger(__name__) | |
| class GoogleDriveDownloader: | |
| SCOPES = ['https://www.googleapis.com/auth/drive'] # <-- Add this line | |
| def __init__(self, download_path: str, drive_folders: dict): | |
| self.download_path = download_path | |
| os.makedirs(download_path, exist_ok=True) | |
| self.gdrive = GoogleDriveManager() | |
| self.drive_folders = drive_folders # Dict with keys: VIDEOS, AUDIOS, TRANSCRIPTS, REPORTS, MENTOR_MATERIALS | |
| gcp_credentials = os.getenv("GCP_CREDENTIALS") | |
| if gcp_credentials: | |
| cred_data = json.loads(gcp_credentials) | |
| self.creds = service_account.Credentials.from_service_account_info( | |
| cred_data, scopes=self.SCOPES | |
| ) | |
| def process_one_video(self, videos_folder_url: str): | |
| videos_folder_id = self.gdrive.get_folder_id(videos_folder_url) | |
| video_files = self.gdrive.list_files(videos_folder_id, ['video/mp4', 'video/x-matroska']) | |
| if not video_files: | |
| logger.info("No videos found in Drive folder.") | |
| return None | |
| # Process only the first video | |
| video = video_files[0] | |
| local_video_path = os.path.join(self.download_path, video['name']) | |
| self.gdrive.download_file(video['id'], local_video_path) | |
| logger.info(f"Downloaded: {video['name']}") | |
| return { | |
| 'id': video['id'], | |
| 'name': video['name'], | |
| 'path': local_video_path | |
| } | |
| def delete_drive_file(self, file_id): | |
| self.gdrive.delete_file(file_id) | |
| def upload_to_drive(self, local_path, folder_key, mime_type): | |
| folder_id = self.drive_folders[folder_key] | |
| return self.gdrive.upload_file(local_path, folder_id, mime_type) | |
| def list_all_videos(self, videos_folder_url: str): | |
| videos_folder_id = self.gdrive.get_folder_id(videos_folder_url) | |
| # Accept both mp4 and mkv | |
| return self.gdrive.list_files(videos_folder_id, ['video/mp4', 'video/x-matroska']) |