Spaces:

Elvoro
/

Tools

Running

App Files Files Community

Tools / src /google_src /drive_utils.py

jebin2

refactor: Centralize logger import to src.logger_config across various modules.

f20025d 3 days ago

raw

history blame contribute delete

11.7 kB

	"""
	Google Drive utilities for centralized upload operations.
	Uses test_data credentials for Drive uploads as fallback when GCS fails.
	"""
	import os
	import uuid
	from pathlib import Path
	from typing import Optional
	from src.logger_config import logger
	from .gcloud_wrapper import get_default_wrapper
	from src.config import get_config_value


	# MIME type mappings for common file extensions
	MIME_TYPES = {
	".mp4": "video/mp4",
	".mov": "video/quicktime",
	".avi": "video/x-msvideo",
	".mkv": "video/x-matroska",
	".mp3": "audio/mpeg",
	".wav": "audio/wav",
	".aac": "audio/aac",
	".m4a": "audio/mp4",
	".png": "image/png",
	".jpg": "image/jpeg",
	".jpeg": "image/jpeg",
	".gif": "image/gif",
	".webp": "image/webp",
	".json": "application/json",
	".txt": "text/plain",
	".srt": "text/plain",
	}


	def get_drive_service(account_name: str = "test_data"):
	"""
	Get a configured Google Drive service for the specified account.
	Defaults to 'test_data' for uploads.
	"""
	wrapper = get_default_wrapper()
	creds = wrapper._get_credentials(account_name)

	from googleapiclient.discovery import build
	return build("drive", "v3", credentials=creds)


	def search_file_by_name(
	filename: str,
	folder_id: Optional[str] = None,
	account_name: str = "test_data",
	) -> Optional[dict]:
	"""
	Search for a file by name in Google Drive.
	Note: folder_id is currently not used for recursive search.
	The search looks for the file by name across the entire accessible Drive.

	Args:
	filename: The filename to search for (exact match first, then partial)
	folder_id: Currently unused (kept for API compatibility)
	account_name: Account to use (default: test_data).

	Returns:
	dict with file info if found, None otherwise:
	{
	"file_id": str,
	"name": str,
	"url": str (viewable link),
	"download_url": str
	}
	"""
	try:
	service = get_drive_service(account_name)

	# Build the search query - search by name globally (supports subfolders)
	escaped_name = filename.replace("'", "\\'")

	# First try exact match by name
	query = f"name = '{escaped_name}' and trashed = false"

	logger.debug(f"🔍 Searching Drive for: {filename}")

	results = service.files().list(
	q=query,
	fields="files(id, name, webViewLink, parents)",
	pageSize=10,
	supportsAllDrives=True,
	includeItemsFromAllDrives=True
	).execute()

	files = results.get("files", [])

	if not files:
	# Try partial match if exact match fails (remove "Copy of " prefix etc.)
	# Also search with just the core filename
	clean_name = filename
	if clean_name.startswith("Copy of "):
	clean_name = clean_name[8:] # Remove "Copy of " prefix

	escaped_clean = clean_name.replace("'", "\\'")
	query = f"name contains '{escaped_clean}' and trashed = false"

	logger.debug(f"🔍 Trying partial match: {clean_name}")

	results = service.files().list(
	q=query,
	fields="files(id, name, webViewLink, parents)",
	pageSize=10,
	supportsAllDrives=True,
	includeItemsFromAllDrives=True
	).execute()
	files = results.get("files", [])

	if files:
	# Prefer exact match if available
	file = files[0]
	for f in files:
	if f.get("name") == filename:
	file = f
	break

	file_id = file.get("id")
	result = {
	"file_id": file_id,
	"name": file.get("name"),
	"url": file.get("webViewLink", f"https://drive.google.com/file/d/{file_id}/view"),
	"download_url": f"https://drive.google.com/uc?export=download&id={file_id}",
	}
	logger.debug(f"✅ Found file: {result['name']} -> {result['url']}")
	return result

	logger.warning(f"⚠️ File not found in Drive: {filename}")
	return None

	except Exception as e:
	logger.error(f"❌ Error searching Drive: {e}")
	return None


	def upload_file_to_drive(
	local_path: str,
	folder_id: Optional[str] = None,
	filename: Optional[str] = None,
	account_name: str = "test_data",
	) -> dict:
	"""
	Upload a local file to Google Drive.

	Args:
	local_path: Path to local file.
	folder_id: Optional Drive folder ID to upload to. If None, uses DRIVE_UPLOAD_FOLDER_ID env var.
	filename: Optional filename to use in Drive. If None, generates a unique name.
	account_name: Account to use (default: test_data).

	Returns:
	dict: {
	"file_id": str,
	"url": str (viewable link),
	"download_url": str (direct download link)
	}
	"""
	from googleapiclient.http import MediaFileUpload

	try:
	service = get_drive_service(account_name)

	# Determine folder ID
	target_folder_id = folder_id or get_config_value("DRIVE_UPLOAD_FOLDER_ID")

	# Get file info
	file_path = Path(local_path)
	file_ext = file_path.suffix.lower()

	# Use provided filename or generate unique one
	unique_name = filename if filename else f"{uuid.uuid4().hex[:8]}_{file_path.name}"

	# Get MIME type
	mime_type = MIME_TYPES.get(file_ext, "application/octet-stream")

	# Prepare file metadata
	file_metadata = {
	"name": unique_name,
	}

	if target_folder_id:
	file_metadata["parents"] = [target_folder_id]

	# Create media upload object
	media = MediaFileUpload(
	local_path,
	mimetype=mime_type,
	resumable=True
	)

	logger.debug(f"📤 Uploading {file_path.name} to Google Drive...")

	# Execute upload (supportsAllDrives=True enables Shared Drive uploads)
	file = service.files().create(
	body=file_metadata,
	media_body=media,
	fields="id, name, webViewLink, webContentLink",
	supportsAllDrives=True
	).execute()

	file_id = file.get("id")

	# Make file publicly accessible (anyone with link can view)
	try:
	service.permissions().create(
	fileId=file_id,
	body={
	"type": "anyone",
	"role": "reader"
	},
	supportsAllDrives=True
	).execute()
	logger.debug(f"✅ File shared publicly")
	except Exception as e:
	logger.warning(f"⚠️ Could not make file public: {e}")

	# Construct URLs
	view_url = file.get("webViewLink", f"https://drive.google.com/file/d/{file_id}/view")
	download_url = f"https://drive.google.com/uc?export=download&id={file_id}"

	result = {
	"file_id": file_id,
	"filename": unique_name,
	"gcs_filename": f"drive/{unique_name}", # For compatibility with GCS result format
	"url": view_url,
	"public_url": view_url,
	"download_url": download_url,
	}

	logger.debug(f"✅ Uploaded to Drive: {view_url}")
	return result

	except Exception as e:
	logger.error(f"❌ Failed to upload to Drive: {e}")
	raise e


	def extract_drive_file_id(url: str) -> str \| None:
	"""
	Extract file ID from various Google Drive URL formats:
	- https://drive.google.com/file/d/FILE_ID/view
	- https://drive.google.com/open?id=FILE_ID
	- https://drive.google.com/uc?id=FILE_ID
	- https://drive.google.com/uc?export=download&id=FILE_ID
	"""
	import re

	if not url or "drive.google.com" not in url:
	return None

	# Pattern 1: /file/d/FILE_ID/
	match = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
	if match:
	return match.group(1)

	# Pattern 2: ?id=FILE_ID or &id=FILE_ID
	match = re.search(r'[?&]id=([a-zA-Z0-9_-]+)', url)
	if match:
	return match.group(1)

	return None


	def delete_file_from_drive(
	url_or_file_id: str,
	account_name: str = "test_data",
	) -> bool:
	"""
	Delete a file from Google Drive.

	Args:
	url_or_file_id: Either a Drive URL or direct file ID.
	account_name: Account to use (default: test_data).

	Returns:
	bool: True if deleted successfully, False otherwise.
	"""
	try:
	service = get_drive_service(account_name)

	# Extract file ID if URL was provided
	if url_or_file_id.startswith("http"):
	file_id = extract_drive_file_id(url_or_file_id)
	if not file_id:
	logger.error(f"❌ Could not extract file ID from URL: {url_or_file_id}")
	return False
	else:
	file_id = url_or_file_id

	logger.debug(f"🗑️ Deleting file from Drive: {file_id}")
	service.files().delete(fileId=file_id).execute()
	logger.debug(f"✅ Deleted from Drive: {file_id}")
	return True

	except Exception as e:
	error_str = str(e)
	if "404" in error_str or "not found" in error_str.lower():
	logger.debug(f"⚠️ File not found (already deleted?): {url_or_file_id}")
	return True # Consider it success if file doesn't exist
	logger.error(f"❌ Failed to delete from Drive: {e}")
	return False


	def update_file_content(
	file_id: str,
	local_path: str,
	account_name: str = "test_data",
	) -> dict:
	"""
	Update the content (media) of an existing Google Drive file.
	Does NOT change the file ID or metadata (name), just the content.
	"""
	from googleapiclient.http import MediaFileUpload

	try:
	service = get_drive_service(account_name)

	# Get file info
	file_path = Path(local_path)
	file_ext = file_path.suffix.lower()

	# Get MIME type
	mime_type = MIME_TYPES.get(file_ext, "application/octet-stream")

	# Create media upload object
	media = MediaFileUpload(
	local_path,
	mimetype=mime_type,
	resumable=True
	)

	logger.debug(f"🔄 Updating file content on Drive : {file_id} with {file_path.name}")

	# Execute update
	file = service.files().update(
	fileId=file_id,
	media_body=media,
	fields="id, name, webViewLink, webContentLink",
	supportsAllDrives=True
	).execute()

	# Construct URLs
	view_url = file.get("webViewLink", f"https://drive.google.com/file/d/{file_id}/view")
	download_url = f"https://drive.google.com/uc?export=download&id={file_id}"

	result = {
	"file_id": file_id,
	"name": file.get("name"),
	"url": view_url,
	"download_url": download_url,
	}

	logger.debug(f"✅ Updated Drive file content: {view_url}")
	return result

	except Exception as e:
	logger.error(f"❌ Failed to update Drive file: {e}")
	raise e