Spaces:
Sleeping
Sleeping
File size: 2,369 Bytes
c0c9f39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
"""Voice Module - AI description + ElevenLabs TTS"""
import logging
from typing import Tuple, Optional
from pathlib import Path
from ..config import get_config
from ..core.analyzer import CodeAnalyzer
from .elevenlabs import VoiceNarrator
logger = logging.getLogger("codeatlas.voice")
NARRATION_PROMPT = """Analyze this architecture diagram and provide a brief, conversational summary suitable for audio narration.
Keep it under 200 words. Focus on what the codebase does, key components and their relationships, and the overall architecture pattern.
Provide a natural, spoken summary (no bullet points, no markdown)."""
def generate_audio_summary(
dot_source: str,
gemini_api_key: Optional[str] = None,
elevenlabs_api_key: Optional[str] = None,
model_name: Optional[str] = None,
voice_id: Optional[str] = None,
) -> Tuple[Optional[Path], str]:
config = get_config()
gemini_key = gemini_api_key or config.gemini_api_key
elevenlabs_key = elevenlabs_api_key or config.elevenlabs_api_key
if not elevenlabs_key:
return None, "⚠️ ElevenLabs API key not set. Go to Settings."
if not gemini_key:
return None, "⚠️ Gemini API key not set. Go to Settings."
if not dot_source:
return None, "⚠️ No diagram loaded. Generate or load a diagram first."
try:
logger.info("Generating description for audio...")
analyzer = CodeAnalyzer(api_key=gemini_key, model_name="gemini-2.0-flash")
prompt = f"{NARRATION_PROMPT}\n\nDOT diagram:\n```\n{dot_source}\n```"
result = analyzer.chat(prompt, "", None)
if not result.success or not result.content:
return None, f"⚠️ Failed to generate description: {result.error or 'Empty response'}"
logger.info(f"Generated description: {len(result.content)} chars")
narrator = VoiceNarrator(api_key=elevenlabs_key)
if not narrator.available:
return None, "⚠️ ElevenLabs not available"
audio_path, error = narrator.generate(result.content, voice_id)
if error:
return None, f"❌ {error}"
return audio_path, "✅ Audio generated!"
except Exception as e:
logger.exception("Audio generation failed")
return None, f"❌ Error: {str(e)}"
|