Spaces:
Sleeping
Sleeping
| from typing import Optional | |
| from google import genai | |
| from google.genai import types | |
| import requests | |
| import os | |
| def analyze_audio(audio_url: str, analysis_prompt: Optional[str] = None) -> str: | |
| """ | |
| Transcribes audio files to text using Google Gemini model. Supports various audio formats including MPEG, MP3, WAV, M4A, etc. | |
| Args: | |
| audio_url (str): Url path to an audio file to anlyze | |
| analysis_prompt (Optional[str]): Optional prompt for specific analysis focus | |
| Returns: | |
| str: Text containing analysis results | |
| """ | |
| try: | |
| # Initialize Google Gen client | |
| gemini_llm = genai.Client(api_key=os.getenv("GOOGLE_API_KEY")) | |
| print(f"Analyzing audio from URL {audio_url}") | |
| text=analysis_prompt or "Provide a detailed transcription of this audio." | |
| audio_bytes = requests.get(audio_url).content | |
| audio = types.Part.from_bytes(data=audio_bytes, mime_type="audio/mpeg") | |
| # Get response from Gemini Flash 2.0 Vision | |
| response = gemini_llm.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=[text, audio], | |
| ) | |
| print(response.text) | |
| return response.text | |
| except Exception as e: | |
| return {"error": f"Error analyzing audio: {str(e)}"} |