Spaces:
Running
Running
| import os | |
| import asyncio | |
| import yt_dlp | |
| from google import genai | |
| from google.genai import types | |
| from typing import Optional, Dict, Any | |
| def transcribe_video(video_url: str) -> Optional[str]: | |
| """ | |
| Downloads audio from a video URL and transcribes it using Gemini. | |
| Migrated to new google.genai SDK. | |
| """ | |
| api_key = os.environ.get("GEMINI_API_KEY") | |
| if not api_key: | |
| return None | |
| try: | |
| # 1. Download Audio | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'mp3', | |
| 'preferredquality': '192', | |
| }], | |
| 'outtmpl': 'temp_audio_%(id)s.%(ext)s', | |
| 'quiet': True, | |
| 'no_warnings': True | |
| } | |
| filename = None | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info = ydl.extract_info(video_url, download=True) | |
| filename = f"temp_audio_{info['id']}.mp3" | |
| if not filename or not os.path.exists(filename): | |
| return None | |
| # 2. Transcribe with Gemini using new SDK | |
| client = genai.Client(api_key=api_key) | |
| # Upload file using new SDK | |
| with open(filename, 'rb') as f: | |
| audio_bytes = f.read() | |
| audio_part = types.Part.from_bytes( | |
| data=audio_bytes, | |
| mime_type="audio/mp3" | |
| ) | |
| prompt = "Transcribe this audio file and summarize the key points." | |
| response = client.models.generate_content( | |
| model='gemini-2.0-flash-exp', | |
| contents=[prompt, audio_part] | |
| ) | |
| # Cleanup | |
| os.remove(filename) | |
| return response.text | |
| except Exception as e: | |
| print(f"Multimedia Transcription Failed: {e}") | |
| if filename and os.path.exists(filename): | |
| os.remove(filename) | |
| return None | |