import os import asyncio import yt_dlp from google import genai from google.genai import types from typing import Optional, Dict, Any def transcribe_video(video_url: str) -> Optional[str]: """ Downloads audio from a video URL and transcribes it using Gemini. Migrated to new google.genai SDK. """ api_key = os.environ.get("GEMINI_API_KEY") if not api_key: return None try: # 1. Download Audio ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], 'outtmpl': 'temp_audio_%(id)s.%(ext)s', 'quiet': True, 'no_warnings': True } filename = None with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(video_url, download=True) filename = f"temp_audio_{info['id']}.mp3" if not filename or not os.path.exists(filename): return None # 2. Transcribe with Gemini using new SDK client = genai.Client(api_key=api_key) # Upload file using new SDK with open(filename, 'rb') as f: audio_bytes = f.read() audio_part = types.Part.from_bytes( data=audio_bytes, mime_type="audio/mp3" ) prompt = "Transcribe this audio file and summarize the key points." response = client.models.generate_content( model='gemini-2.0-flash-exp', contents=[prompt, audio_part] ) # Cleanup os.remove(filename) return response.text except Exception as e: print(f"Multimedia Transcription Failed: {e}") if filename and os.path.exists(filename): os.remove(filename) return None